Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • cern/fsauerbu/FreeForestML
1 result
Show changes
Commits on Source (22)
Showing with 1101 additions and 1119 deletions
......@@ -5,3 +5,4 @@ __pycache__
*.h5
doc/*
.eggs/
examples/*.html
......@@ -18,7 +18,7 @@ unittest:
- pip install -r requirements.txt
- python setup.py test
doc_build:
build:doc:
stage: build
image: python:3.7
script:
......@@ -33,19 +33,46 @@ doc_build:
- _public
expire_in: 1 mos
deploy:
build:examples:
stage: build
image: python:3.7
script:
- python setup.py install
- cd examples
- pip install -r requirements.txt
- jupyter-nbconvert --ExecutePreprocessor.timeout=300 --execute --to html *.ipynb
artifacts:
paths:
- examples/*.html
- examples/*.ipynb
expire_in: 1 mos
deploy:examples:
stage: deploy
only:
- master
dependencies:
- build:examples
variables:
"CI_WEBSITE_DIR": "examples/"
"DFS_WEBSITE_DIR": "examples/"
image: gitlab-registry.cern.ch/ci-tools/ci-web-deployer
script:
- deploy-dfs
deploy:doc:
stage: deploy
only:
- master
dependencies:
- doc_build
- build:doc
variables:
"CI_WEBSITE_DIR": "_public/"
image: gitlab-registry.cern.ch/ci-tools/ci-web-deployer
script:
- deploy-dfs
build_master:
build:docker:master:
stage: build
variables:
DOCKER_FILE: Dockerfile
......@@ -57,7 +84,7 @@ build_master:
tags:
- docker-image-build
build_commit:
build:docker:commit:
stage: build
variables:
DOCKER_FILE: Dockerfile
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -5,11 +5,12 @@ Pure Python framework to train neural networks for high energy physics analysis.
Examples
--------
* `Histogram <Histogram.ipynb>`_
* `HistogramFactory <HistogramFactory.ipynb>`_
* `ConfusionMatrix <ConfusionMatrix.ipynb>`_
* `ROC <ROC.ipynb>`_
* `ToyData <https://nnfwtbn.web.cern.ch/examples/ToyData.html>`_
* `Histogram <https://nnfwtbn.web.cern.ch/examples/Histogram.html>`_
* `HistogramFactory <https://nnfwtbn.web.cern.ch/examples/HistogramFactory.html>`_
* `ConfusionMatrix <https://nnfwtbn.web.cern.ch/examples/ConfusionMatrix.html>`_
* `ROC <https://nnfwtbn.web.cern.ch/examples/ROC.html>`_
* `Classification <https://nnfwtbn.web.cern.ch/examples/Classification.html>`_
Links
......
This diff is collapsed.
%% Cell type:code id: tags:
``` python
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import SGD
from nnfwtbn import Variable, Process, Cut, \
HepNet, ClassicalCV, EstimatorNormalizer, \
HistogramFactory, confusion_matrix, atlasify
from nnfwtbn import toydata
```
%% Cell type:code id: tags:
``` python
df = toydata.get()
```
%% Cell type:code id: tags:
``` python
p_ztt = Process(r"$Z\rightarrow\tau\tau$", range=(0, 0))
p_sig = Process(r"Signal", range=(1, 1))
```
%% Cell type:code id: tags:
``` python
hist_factory = HistogramFactory(df, stacks=[[p_sig, p_ztt]],
weight="weight", color=[sns.color_palette()])
```
%% Cell type:markdown id: tags:
# Cut-based
%% Cell type:code id: tags:
``` python
hist_factory(Variable("$\Delta \eta^{jj}$",
lambda d: (d.jet_1_eta - d.jet_2_eta).abs()),
bins=20, range=(0, 8))
hist_factory(Variable("$m^{jj}$", "m_jj"),
bins=20, range=(0, 1500))
None
```
%% Cell type:code id: tags:
``` python
c_sr = Cut(lambda d: d.m_jj > 400) & \
Cut(lambda d: d.jet_2_pt >= 30) & \
Cut(lambda d: d.jet_1_eta * d.jet_2_eta < 0) & \
Cut(lambda d: (d.jet_2_eta - d.jet_1_eta).abs() > 3)
c_sr.label = "Signal"
c_rest = (~c_sr)
c_rest.label = "Rest"
```
%% Cell type:code id: tags:
``` python
confusion_matrix(df, [p_sig, p_ztt], [c_sr, c_rest],
x_label="Signal", y_label="Region", annot=True, weight="weight")
confusion_matrix(df, [p_sig, p_ztt], [c_sr, c_rest], normalize_rows=True,
x_label="Signal", y_label="Region", annot=True, weight="weight")
None
```
%% Cell type:markdown id: tags:
# Neural Network
%% Cell type:code id: tags:
``` python
df['dijet_deta'] = (df.jet_1_eta - df.jet_2_eta).abs()
df['dijet_prod_eta'] = (df.jet_1_eta * df.jet_2_eta)
input_var = ['dijet_prod_eta', 'm_jj', 'dijet_deta', 'higgs_pt', 'jet_2_pt', 'jet_1_eta', 'jet_2_eta', 'tau_eta']
output_var = ['is_sig', 'is_ztt']
```
%% Cell type:code id: tags:
``` python
df["is_sig"] = p_sig.selection.idx_array(df)
df["is_ztt"] = p_ztt.selection.idx_array(df)
```
%% Cell type:code id: tags:
``` python
sns.pairplot(df.sample(n=1000), vars=input_var, hue="is_sig")
None
```
%% Cell type:code id: tags:
``` python
def model():
m = Sequential()
m.add(Dense(units=15, activation='relu', input_dim=len(input_var)))
m.add(Dense(units=5, activation='relu'))
m.add(Dense(units=2, activation='softmax'))
m.compile(loss='categorical_crossentropy',
optimizer=SGD(lr=0.1),
metrics=['categorical_accuracy'])
return m
df['random'] = toydata.rng.random(size=len(df))
cv = ClassicalCV(5, frac_var='random')
net = HepNet(model, cv, EstimatorNormalizer, input_var, output_var)
```
%% Cell type:code id: tags:
``` python
sig_wf = len(p_sig.selection(df).weight) / p_sig.selection(df).weight.sum()
ztt_wf = len(p_ztt.selection(df).weight) / p_ztt.selection(df).weight.sum()
```
%% Cell type:code id: tags:
``` python
net.fit(df, epochs=150, verbose=0, batch_size=2048,
weight=Variable("weight", lambda d: d.weight * (d.is_sig * sig_wf + d.is_ztt * ztt_wf)))
```
%% Cell type:code id: tags:
``` python
sns.lineplot(x='epoch', y='loss', data=net.history, label="Training")
sns.lineplot(x='epoch', y='val_loss', data=net.history, label="Validation")
plt.ylabel("loss")
atlasify("Internal")
None
```
%% Cell type:markdown id: tags:
## Accuracy
%% Cell type:code id: tags:
``` python
sns.lineplot(x='epoch', y='categorical_accuracy', data=net.history, label="Training")
sns.lineplot(x='epoch', y='val_categorical_accuracy', data=net.history, label="Validation")
plt.ylabel("Accuracy")
atlasify("Internal")
None
```
%% Cell type:code id: tags:
``` python
sns.lineplot(x='epoch', y='val_categorical_accuracy', data=net.history, hue="fold")
atlasify("Internal", enlarge=1.6)
None
```
%% Cell type:code id: tags:
``` python
out = net.predict(df, cv='test')
out['pred_sig'] = out.pred_is_sig >= 0.5
```
%% Cell type:code id: tags:
``` python
c_pred_sig = Process("Signal", lambda d: d.pred_is_sig >= 0.5)
c_pred_ztt = Process(r"$Z\rightarrow\tau\tau$", lambda d: d.pred_is_sig < 0.5)
confusion_matrix(out, [p_sig, p_ztt], [c_pred_sig, c_pred_ztt],
x_label="Truth", y_label="Classification", annot=True, weight="weight")
confusion_matrix(out, [p_sig, p_ztt], [c_pred_sig, c_pred_ztt], normalize_rows=True,
x_label="Truth", y_label="Classification", annot=True, weight="weight")
None
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
import pandas as pd
import seaborn as sns
from nnfwtbn import Variable, Process, Cut, confusion_matrix, HistogramFactory
from nnfwtbn import toydata
```
%% Cell type:code id: tags:
``` python
df = toydata.get()
```
%% Cell type:code id: tags:
``` python
p_sig = Process(r"Signal", range=(1, 1))
p_ztt = Process(r"$Z\rightarrow\tau\tau$", range=(0, 0))
```
%% Cell type:code id: tags:
``` python
c_low = Cut(lambda d: d.m_jj < 350, label="Low $m^{jj}$")
c_mid = Cut(lambda d: (d.m_jj >= 350) & (d.m_jj < 600), label="Mid $m^{jj}$")
c_high = Cut(lambda d: d.m_jj > 600, label="High $m^{jj}$")
```
%% Cell type:markdown id: tags:
## Normalized columns
%% Cell type:code id: tags:
``` python
confusion_matrix(df, [p_sig, p_ztt], [c_low, c_mid, c_high],
y_label="Region", x_label="Truth Signal", annot=True, weight="weight")
None
```
%% Cell type:markdown id: tags:
## Normalized rows
%% Cell type:code id: tags:
``` python
confusion_matrix(df, [p_sig, p_ztt], [c_low, c_mid, c_high], normalize_rows=True,
y_label="Region", x_label="Truth Signal", annot=True, weight="weight")
None
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
import pandas as pd
import seaborn as sns
from nnfwtbn import Variable, Process, Cut, hist
from nnfwtbn import toydata
```
%% Cell type:code id: tags:
``` python
df = toydata.get()
```
%% Cell type:code id: tags:
``` python
p_ztt = Process(r"$Z\rightarrow\tau\tau$", range=(0, 0))
p_sig = Process(r"Signal", range=(1, 1))
p_asimov = Process(r"Asimov", selection=lambda d: d.fpid >= 0)
```
%% Cell type:code id: tags:
``` python
v_higgs_m = Variable(r"$m^H$", "higgs_m", "GeV")
```
%% Cell type:code id: tags:
``` python
c_vbf = Cut(lambda d: d.dijet_p4__M > 400) & \
Cut(lambda d: d.jet_1_p4__Pt >= 30) & \
Cut(lambda d: d.is_dijet_centrality == 1) & \
Cut(lambda d: d.jet_0_p4__Eta * df.jet_1_p4__Eta < 0) & \
Cut(lambda d: (d.jet_0_p4__Eta - df.jet_1_p4__Eta).abs() > 3)
```
%% Cell type:code id: tags:
``` python
hist(df, v_higgs_m, 20, stacks=[[p_ztt, p_sig], [p_asimov]], range=(0, 200), selection=None,
weight="weight", color=[sns.color_palette(), ['black']],
histtype=[['stepfilled']*4 + ['step'], 'points'], ratio_label="Data / SM")
None
```
%% Cell type:code id: tags:
``` python
hist(df, v_higgs_m, 20, stacks=[[p_ztt, p_sig], [p_asimov]], range=(0, 200), selection=None,
weight="weight", color=[sns.color_palette(), ['black']], y_log=True,
histtype=[['stepfilled']*4 + ['step'], 'points'], numerator=None)
None
```
%% Cell type:code id: tags:
``` python
hist(df, v_higgs_m, 20, stacks=[[p_ztt, p_sig], [p_asimov]], range=(0, 200), selection=None,
weight="weight", color=[sns.color_palette(), ['black']],
y_log=True, y_min=1e-1, vlines=[80, {'x': 100, 'color': 'b'}],
histtype=[['stepfilled']*4 + ['step'], 'points'], ratio_label="MC / Data", numerator=0, denominator=1)
None
```
%% Cell type:code id: tags:
``` python
hist(df, v_higgs_m, 20, stacks=[[p_ztt, p_sig], [p_asimov]], range=(40, 120), selection=None,
weight="weight", color=[sns.color_palette(), ['black']],
y_log=True, y_min=1e-1, vlines=[80, {'x': 100, 'color': 'b'}],
numerator=[p_asimov], denominator=[p_ztt],
histtype=[['stepfilled']*4 + ['step'], 'points'], ratio_label="Data / Bkg")
None
```
%% Cell type:code id: tags:
``` python
hist(df, v_higgs_m, 20, stacks=[[p_ztt, p_sig], [p_asimov]], range=(40, 120), selection=None,
weight="weight", color=[sns.color_palette(), ['black']],
y_log=True, y_min=1e-1, vlines=[80, {'x': 100, 'color': 'b'}],
numerator=[p_asimov], denominator=[p_ztt], diff=True,
histtype=[['stepfilled']*4 + ['step'], 'points'], ratio_label="Data / Bkg")
None
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
import pandas as pd
import seaborn as sns
from nnfwtbn import Variable, Process, Cut, hist, HistogramFactory
from nnfwtbn import toydata
```
%% Cell type:code id: tags:
``` python
df = toydata.get()
```
%% Cell type:code id: tags:
``` python
p_ztt = Process(r"$Z\rightarrow\tau\tau$", range=(0, 0))
p_sig = Process(r"Signal", range=(1, 1))
p_asimov = Process(r"Asimov", selection=lambda d: d.fpid >= 0)
```
%% Cell type:code id: tags:
``` python
c_vbf = Cut(lambda d: d.dijet_p4__M > 400) & \
Cut(lambda d: d.jet_1_p4__Pt >= 30) & \
Cut(lambda d: d.is_dijet_centrality == 1) & \
Cut(lambda d: d.jet_0_p4__Eta * df.jet_1_p4__Eta < 0) & \
Cut(lambda d: (d.jet_0_p4__Eta - df.jet_1_p4__Eta).abs() > 3)
```
%% Cell type:code id: tags:
``` python
hist_factory = HistogramFactory(df, bins=20, stacks=[[p_ztt, p_sig], [p_asimov]], range=(0, 200), selection=None,
weight="weight", color=[sns.color_palette(), ['black']],
histtype=[['stepfilled']*4 + ['step'], 'points'])
None
```
%% Cell type:code id: tags:
``` python
v_mmc = Variable(r"$m^H$", "higgs_m", "GeV")
hist_factory(v_mmc)
None
```
%% Cell type:code id: tags:
``` python
v_tau_pT = Variable(r"$p_\mathrm{T}{\tau}$", "tau_pt", "GeV")
hist_factory(v_tau_pT, bins=12, range=(0, 120))
None
```
%% Cell type:code id: tags:
``` python
v_lep_pT = Variable(r"$p_\mathrm{T}{\ell}$", "lep_pt", "GeV")
hist_factory(v_lep_pT, bins=12, range=(0, 120))
None
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
import pandas as pd
import seaborn as sns
from nnfwtbn import Variable, Process, Cut, roc
from nnfwtbn import toydata
```
%% Cell type:code id: tags:
``` python
df = toydata.get()
df['noise'] = df.fpid + toydata.rng.normal(0, 0.3, size=len(df))
```
%% Cell type:code id: tags:
``` python
p_bkg = Process(r"Background", range=(0, 0))
p_sig = Process(r"Signal", range=(1, 1))
```
%% Cell type:code id: tags:
``` python
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
v_higgs_m = Variable(r"$m^H$", "higgs_m", "GeV")
roc(df, p_sig, p_bkg, v_higgs_m, axes=ax, steps=400)
roc(df, p_sig, p_bkg, Variable("Noise ID", "noise"), axes=ax)
None
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
from nnfwtbn import Variable, Process, Cut, hist
from nnfwtbn.toydata import generate, proposal, mcmc_step, vbfh_pdf, mcmc, ztt_pdf
from pylorentz import Momentum4
import numpy as np
import pandas as pd
import seaborn as sns
```
%% Cell type:code id: tags:
``` python
# shuffle=False allows plotting the MC walk
%time df = generate(10000, vbfh_frac=0.5, shuffle=False)
```
%% Cell type:code id: tags:
``` python
import matplotlib.pyplot as plt
```
%% Cell type:markdown id: tags:
# Markov chain walk
%% Cell type:code id: tags:
``` python
idx = df.fpid == 1
fig, ax = plt.subplots(2, 1, sharex=True)
ax[0].set_title("VBFH")
ax[0].plot(np.arange(sum(idx)), df.jet_1_pt[idx], label="Jet 1")
ax[0].plot(np.arange(sum(idx)), df.jet_2_pt[idx], label="Jet 2")
ax[0].set_ylabel(r"$p_{\mathrm{T}}$")
ax[0].legend()
ax[1].set_title(r"$Z\rightarrow\tau\tau$")
ax[1].plot(np.arange(sum(~idx)), df.jet_1_pt[~idx], label="Jet 1")
ax[1].plot(np.arange(sum(~idx)), df.jet_2_pt[~idx], label="Jet 2")
ax[1].set_ylabel(r"$p_{\mathrm{T}}$")
ax[1].legend()
fig.tight_layout()
None
```
%% Cell type:code id: tags:
``` python
fig, ax = plt.subplots(2, 1, sharex=True)
ax[0].set_title("VBFH")
ax[0].plot(np.arange(sum(idx)), df.jet_1_eta[idx].abs(), label="Jet 1")
ax[0].plot(np.arange(sum(idx)), df.jet_2_eta[idx].abs(), label="Jet 2")
ax[0].set_ylabel(r"$|\eta_j|$")
ax[0].legend()
ax[1].set_title(r"$Z\rightarrow\tau\tau$")
ax[1].plot(np.arange(sum(~idx)), df.jet_1_eta[~idx].abs(), label="Jet 1")
ax[1].plot(np.arange(sum(~idx)), df.jet_2_eta[~idx].abs(), label="Jet 2")
ax[1].set_ylabel(r"$|\eta_j|$")
ax[1].legend()
fig.tight_layout()
None
```
%% Cell type:code id: tags:
``` python
fig, ax = plt.subplots(1, 1)
ax.plot(np.arange(sum(idx)), df.higgs_m[idx], label="VBFH")
ax.plot(np.arange(sum(~idx)), df.higgs_m[~idx], label=r"$Z\rightarrow\tau\tau$")
ax.set_ylabel(r"$m^H$")
ax.legend()
fig.tight_layout()
None
```
%% Cell type:markdown id: tags:
# Distributions
%% Cell type:code id: tags:
``` python
p_vbfh = Process("VBFH", range=(1, 1))
p_ztt = Process(r"$Z\rightarrow\tau\tau$", range=(0, 0))
```
%% Cell type:code id: tags:
``` python
jet_1_eta = Variable(r"$\eta^{j_1}$", "jet_1_eta")
```
%% Cell type:code id: tags:
``` python
hist(df, jet_1_eta, bins=20, range=(-6, 6), stacks=[[p_ztt, p_vbfh]],
weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
histtype=[['stepfilled']*5])
None
```
%% Cell type:code id: tags:
``` python
jet_2_eta = Variable(r"$\eta^{j_2}$", "jet_2_eta")
hist(df, jet_2_eta, bins=20, range=(-6, 6), stacks=[[p_ztt, p_vbfh]],
weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
histtype=[['stepfilled']*5])
None
```
%% Cell type:code id: tags:
``` python
delta_eta_jets = Variable(r"$|\Delta\eta{jj}|$", lambda d: d.jet_1_eta.abs())
hist(df, delta_eta_jets, bins=22, range=(-1, 10), stacks=[[p_ztt, p_vbfh]],
weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
histtype=[['stepfilled']*5])
None
```
%% Cell type:code id: tags:
``` python
jet_1_pt = Variable(r"$p_{\mathrm{T}}^{j_1}$", "jet_1_pt")
jet_2_pt = Variable(r"$p_{\mathrm{T}}^{j_2}$", "jet_2_pt")
hist(df, jet_1_pt, bins=50, range=(0, 1000), stacks=[[p_ztt, p_vbfh]],
weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
histtype=[['stepfilled']*5])
hist(df, jet_2_pt, bins=25, range=(0, 250), stacks=[[p_ztt, p_vbfh]],
weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
histtype=[['stepfilled']*5])
None
```
%% Cell type:code id: tags:
``` python
v_higgs_m = Variable(r"$m^H$", "higgs_m")
hist(df, v_higgs_m, bins=20, range=(0, 200), stacks=[[p_ztt, p_vbfh]],
weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
histtype=[['stepfilled']*5])
None
```
%% Cell type:code id: tags:
``` python
v_weight = Variable(r"$w$", "weight")
hist(df, v_weight, bins=20, range=(0, 2), stacks=[[p_ztt, p_vbfh]],
weight=lambda d: d.weight * 1 + 0, color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
histtype=[['stepfilled']*5])
None
```
%% Cell type:code id: tags:
``` python
v_mjj = Variable(r"$m^{jj}$", "m_jj")
hist(df, v_mjj, bins=51, range=(-40, 2000), stacks=[[p_ztt, p_vbfh]],
weight=lambda d: d.weight * 1 + 0, color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
histtype=[['stepfilled']*5])
hist(df, v_mjj, bins=51, range=(-40, 2000), stacks=[[p_vbfh]],
weight=lambda d: d.weight * 1 + 0, color=[sns.color_palette()[1:]],
histtype=[['stepfilled']*5])
None
```
%% Cell type:code id: tags:
``` python
v_lep_pt = Variable(r"$p^{\ell}_{\mathrm{T}}$", "lep_pt")
hist(df, v_lep_pt, bins=30, range=(-10, 290), stacks=[[p_ztt, p_vbfh]],
weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
histtype=[['stepfilled']*5])
None
```
%% Cell type:code id: tags:
``` python
v_lep_eta = Variable(r"$|\eta^{\ell}|$", "lep_eta")
hist(df, v_lep_eta, bins=30, range=(-5, 5), stacks=[[p_ztt, p_vbfh]],
weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
histtype=[['stepfilled']*5])
None
```
%% Cell type:code id: tags:
``` python
v_tau_pt = Variable(r"$p^{\tau}_{\mathrm{T}}$", "tau_pt")
hist(df, v_tau_pt, bins=30, range=(-10, 290), stacks=[[p_ztt, p_vbfh]],
weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
histtype=[['stepfilled']*5])
None
```
%% Cell type:code id: tags:
``` python
v_tau_eta = Variable(r"$|\eta^{\tau}|$", "tau_eta")
hist(df, v_tau_eta, bins=30, range=(-5, 5), stacks=[[p_ztt, p_vbfh]],
weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
histtype=[['stepfilled']*5])
None
```
%% Cell type:code id: tags:
``` python
v_met_pt = Variable(r"$E^{\mathrm{miss}}_{\mathrm{T}}$", "met_pt")
hist(df, v_met_pt, bins=30, range=(-10, 290), stacks=[[p_ztt, p_vbfh]],
weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
histtype=[['stepfilled']*5])
None
```
%% Cell type:code id: tags:
``` python
v_lep_cent = Variable(r"$\eta^{\ell}$", "tau_centrality")
hist(df, v_lep_cent, bins=22, range=(-0.05, 1.05), stacks=[[p_ztt, p_vbfh]],
weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
histtype=[['stepfilled']*5])
None
```
%% Cell type:code id: tags:
``` python
v_tau_cent = Variable(r"$\eta^{\tau}$", "tau_centrality")
hist(df, v_tau_cent, bins=22, range=(-0.05, 1.05), stacks=[[p_ztt, p_vbfh]],
weight="weight", color=[sns.color_palette("Blues")[:1] + sns.color_palette()[1:]],
histtype=[['stepfilled']*5])
None
```
%% Cell type:code id: tags:
``` python
```
keras
tensorflow
jupyter
import unittest
import numpy as np
from nnfwtbn.toydata import draw
SEED = 798088218969
class DrawTestCase(unittest.TestCase):
"""
Test the implementation of draw().
"""
@staticmethod
def _rng(seed=SEED):
"""
Returns a new random number generator.
"""
return np.random.Generator(np.random.PCG64(seed))
def setUp(self):
"""
Instantiate a random number generator.
"""
self.rng = DrawTestCase._rng()
@staticmethod
def _toy_pdf(x):
"""
A toy PDF for testing: Normalized parabola between 0 and 1
"""
return 3 * x**2
@staticmethod
def _toy_pdf2(x):
"""
A toy PDF for testing: Normalized parabola between 1 and 11
"""
return 3 * (x - 1)**2 / 1000
def test_draw_len(self):
"""
Check that draw returns the number of samples given by the size
parameter.
"""
self.assertEqual(len(draw(self.rng, DrawTestCase._toy_pdf)), 1)
self.assertEqual(len(draw(self.rng, DrawTestCase._toy_pdf, size=10)), 10)
self.assertEqual(draw(self.rng, DrawTestCase._toy_pdf, size=(2, 5)).shape,
(2, 5))
def test_draw_reproducible(self):
"""
Check that draw returns the same array when called with identical
arguments.
"""
rng1 = DrawTestCase._rng()
rng2 = DrawTestCase._rng()
sample1 = draw(rng1, DrawTestCase._toy_pdf, size=(10, 100))
sample2 = draw(rng2, DrawTestCase._toy_pdf, size=(10, 100))
self.assertTrue((sample1 == sample2).all())
def test_draw_seed(self):
"""
Check that different arrays are returned when different seeds are
given.
"""
rng1 = DrawTestCase._rng(42)
rng2 = DrawTestCase._rng(43)
sample1 = draw(rng1, DrawTestCase._toy_pdf, size=(10, 100))
sample2 = draw(rng2, DrawTestCase._toy_pdf, size=(10, 100))
self.assertFalse((sample1 == sample2).all())
def test_draw_limits(self):
"""
Check that the returned numbers are withing the limit.
"""
sample = draw(self.rng, DrawTestCase._toy_pdf, size=1000*1000)
self.assertGreaterEqual(sample.min(), 0)
self.assertLess(sample.min(), 0.01)
self.assertGreater(sample.max(), 0.9999)
self.assertLessEqual(sample.max(), 1)
def test_draw_limits_2(self):
"""
Check that the returned numbers are withing the limit.
"""
sample = draw(self.rng, DrawTestCase._toy_pdf2, lower=1, upper=11, size=1000*1000)
self.assertGreaterEqual(sample.min(), 1)
self.assertLess(sample.min(), 1.1)
self.assertGreater(sample.max(), 10.999)
self.assertLessEqual(sample.max(), 11)
"""
This module implements method to generate a deterministic, physics-inspired
toy dataset. The dataset is intended for documentations and examples. The
module does not rely on external random number generators (seeding numpy
might break user code).
"""
import math
import numpy as np
from scipy.integrate import quad
from scipy.interpolate import interp1d
import pandas as pd
from pylorentz import Momentum4
def draw(rng, pdf, size=1, lower=0, upper=1, N=100):
"""
Draws a size-shaped random sample from the given PDF. The PDF must be
normalized to unity withing the given limits.
"""
grid = np.linspace(lower, upper, N)
cdf = np.array([quad(pdf, lower, x)[0] for x in grid])
inv_cdf = interp1d(cdf, grid, bounds_error=False,
fill_value=(0, 1))
ys = rng.uniform(size=size)
return inv_cdf(ys)
rng = np.random.RandomState(2221006818) # random seed
def augment(point):
jet_1_pt, jet_1_eta, jet_1_phi, jet_2_pt, jet_2_eta, jet_2_phi, met_phi, \
met_pt, tau_phi, tau_eta, tau_pt, lep_phi, lep_eta, lep_pt = point
jet_1 = Momentum4.m_eta_phi_pt(0, jet_1_eta, jet_1_phi, jet_1_pt)
jet_2 = Momentum4.m_eta_phi_pt(0, jet_2_eta, jet_2_phi, jet_2_pt)
tau = Momentum4.m_eta_phi_pt(1.8, tau_eta, tau_phi, tau_pt)
lep = Momentum4.m_eta_phi_pt(0.1, lep_eta, lep_phi, lep_pt)
met = Momentum4.m_eta_phi_pt(0.0, 0, met_phi, met_pt)
lep_centrality = np.exp(- 4 / (jet_1_eta - jet_2_eta)**2 * (lep_eta * (jet_1_eta + jet_2_eta)/2)**2)
tau_centrality = np.exp(- 4 / (jet_1_eta - jet_2_eta)**2 * (tau_eta * (jet_1_eta + jet_2_eta)/2)**2)
higgs = tau + lep + met
return jet_1_pt, jet_1_eta, jet_1_phi, jet_2_pt, jet_2_eta, jet_2_phi, met_phi, \
met_pt, tau_phi, tau_eta, tau_pt, lep_phi, lep_eta, lep_pt, \
higgs.p_t, higgs.eta, higgs.phi, higgs.m, (jet_1 + jet_2).m, \
lep_centrality, tau_centrality
def generate(total, vbfh_frac=0.2, shuffle=True):
n_vbfh = math.ceil(vbfh_frac * total)
n_ztt = total - n_vbfh
df_vbfh = mcmc(n_vbfh, vbfh_pdf)
df_vbfh['fpid'] = 1
df_vbfh['weight'] = 1
df_ztt = mcmc(n_ztt, ztt_pdf)
df_ztt['fpid'] = 0
df_ztt['weight'] = 1
df = pd.concat([df_vbfh, df_ztt])
if shuffle:
df = df.sample(frac=1, random_state=rng)
df.reset_index(drop=True, inplace=True)
return df
def vbfh_pdf(point):
"""
Returns the relative probability density at the given point. The function
is not properly normalized. The outer dimension of the point contains the
following values:
- jet_1_pt
- jet_1_eta
- jet_1_phi
- jet_2_pt
- jet_2_eta
- jet_2_phi
- met_phi
- met_pt
- tau_phi
- tau_eta
- tau_pt
- lep_phi
- lep_eta
- lep_pt
"""
overall = 1.
point = augment(point)
jet_1_pt, jet_1_eta, jet_1_phi, jet_2_pt, jet_2_eta, jet_2_phi, met_phi, \
met_pt, tau_phi, tau_eta, tau_pt, lep_phi, lep_eta, lep_pt, \
higgs_pt, higgs_eta, higgs_phi, higgs_m, m_jj, lep_centrality, \
tau_centrality = point
# jet system
overall *= (jet_1_pt > jet_2_pt)
overall *= abs(jet_1_eta - jet_2_eta)
overall *= abs(jet_1_eta) * np.exp(-abs(jet_1_eta)**2 / 9)
overall *= abs(jet_2_eta) * np.exp(-abs(jet_2_eta)**2 / 9)
overall *= (jet_1_pt > 25)
overall *= (jet_2_pt > 25)
overall *= 1 / (jet_1_pt)
overall *= 1 / (jet_2_pt)
# Higgs system
overall *= np.exp(-0.5 * (higgs_m - 125)**2 / 15**2)
overall *= 1 / (1 + np.exp((m_jj-1000) / 500))
overall *= 1 / (1 + np.exp((m_jj-1000) / 500))
overall *= (met_pt > 0)
overall *= (lep_pt > 10)
overall *= (tau_pt > 20)
overall *= np.exp(-lep_pt / 180)
overall *= np.exp(-tau_pt / 180)
overall *= np.exp(-met_pt / 50)
overall *= (lep_centrality >= 0)
overall *= (lep_centrality < 1)
overall *= lep_centrality / 2 + 0.5
overall *= (tau_centrality >= 0)
overall *= (tau_centrality < 1)
overall *= tau_centrality / 2 + 0.5
return overall
def ztt_pdf(point):
overall = 1.
point = augment(point)
jet_1_pt, jet_1_eta, jet_1_phi, jet_2_pt, jet_2_eta, jet_2_phi, met_phi, \
met_pt, tau_phi, tau_eta, tau_pt, lep_phi, lep_eta, lep_pt, \
higgs_pt, higgs_eta, higgs_phi, higgs_m, m_jj, lep_centrality, \
tau_centrality = point
# jet system
overall *= (jet_1_pt > jet_2_pt)
overall *= np.exp(-abs(jet_1_eta)**2 / 9)
overall *= np.exp(-abs(jet_2_eta)**2 / 9)
overall *= (jet_1_pt > 25)
overall *= (jet_2_pt > 25)
overall *= np.exp(-jet_1_pt / 100)
overall *= np.exp(-jet_2_pt / 60)
# Z system
overall *= np.exp(-0.5 * (higgs_m - 90)**2 / 10**2)
overall *= (met_pt > 0)
overall *= (lep_pt > 10)
overall *= (tau_pt > 20)
overall *= np.exp(-lep_pt / 150)
overall *= np.exp(-tau_pt / 150)
overall *= np.exp(-met_pt / 50)
overall *= (lep_centrality >= 0)
overall *= (lep_centrality < 1)
overall *= (1 - lep_centrality / 2)
overall *= (tau_centrality >= 0)
overall *= (tau_centrality < 1)
overall *= (1 - tau_centrality / 2)
overall *= np.exp(-m_jj / 200)
return overall
def proposal(point):
jet_1_pt, jet_1_eta, jet_1_phi, jet_2_pt, jet_2_eta, jet_2_phi, met_phi, \
met_pt, tau_phi, tau_eta, tau_pt, lep_phi, lep_eta, lep_pt = point
jet_1_pt = rng.normal(jet_1_pt, 10)
jet_2_pt = rng.normal(jet_2_pt, 10)
tau_pt = rng.normal(tau_pt, 10)
lep_pt = rng.normal(lep_pt, 10)
met_pt = rng.normal(met_pt, 10)
flip = rng.choice([1, -1])
jet_1_eta = rng.normal(jet_1_eta, 0.2) * flip
jet_2_eta = rng.normal(jet_2_eta, 0.2) * flip
tau_eta = rng.normal(tau_eta, 0.2) * flip
lep_eta = rng.normal(lep_eta, 0.2) * flip
rot = rng.uniform(0, 2* np.pi)
jet_1_phi = rng.normal(jet_1_phi + rot, 0.1) % (2 * np.pi)
jet_2_phi = rng.normal(jet_2_phi + rot, 0.1) % (2 * np.pi)
tau_phi = rng.normal(tau_phi + rot, 0.1) % (2 * np.pi)
lep_phi = rng.normal(lep_phi + rot, 0.1) % (2 * np.pi)
met_phi = rng.normal(met_phi + rot, 0.1) % (2 * np.pi)
return jet_1_pt, jet_1_eta, jet_1_phi, jet_2_pt, jet_2_eta, jet_2_phi, met_phi, \
met_pt, tau_phi, tau_eta, tau_pt, lep_phi, lep_eta, lep_pt
def mcmc_step(x, pdf):
"""
Perform a single step of Markov chain Monte Carlo.
"""
while True:
new_x = proposal(x)
alpha = pdf(new_x) / pdf(x)
u = rng.random()
if u <= alpha:
return new_x
def mcmc(length, pdf):
"""
Generate length many samples.
"""
point = 100, 1, 1, 50, 2, 1, 1, 1, 1, 1, 40, 1, 1, 30
for i in range(1000):
point = mcmc_step(point, pdf)
points = []
for i in range(length):
point = mcmc_step(point, pdf)
points.append(augment(point))
return pd.DataFrame(points,
columns=["jet_1_pt", "jet_1_eta", "jet_1_phi", "jet_2_pt", "jet_2_eta", "jet_2_phi",
"met_phi", "met_pt", "tau_phi", "tau_eta", "tau_pt", "lep_phi", "lep_eta",
"lep_pt", "higgs_pt", "higgs_eta", "higgs_phi", "higgs_m",
"m_jj", "lep_centrality", "tau_centrality"])
def get():
try:
df = pd.read_hdf(".toydata.h5", "main")
except FileNotFoundError:
print("Cannot find cached data. Recreating toy data. This might take "
"some time...")
df = generate(10000)
df.to_hdf(".toydata.h5", "main")
return df
......@@ -5,3 +5,4 @@ matplotlib
seaborn
tables
pandas
pylorentz
......@@ -18,7 +18,8 @@ setup(name='nnfwtbn',
"matplotlib",
"seaborn",
"tables",
"pandas"],
"pandas",
"pylorentz"],
test_suite='nnfwtbn.tests',
description='Experimental neural network framework to be named.',
url="https://gitlab.cern.ch/fsauerbu/nnfwtbn",
......