Skip to content
Snippets Groups Projects

Resolve "Integrate and interface to uhep"

Merged Frank Sauerburger requested to merge 62-integrate-and-interface-to-uhep into master
1 file
+ 196
2
Compare changes
  • Side-by-side
  • Inline
+ 196
2
@@ -9,6 +9,7 @@ import dask.dataframe as dd
import seaborn as sns
from atlasify import atlasify
import uhepp
from nnfwtbn.stack import Stack
from nnfwtbn.process import Process
@@ -60,7 +61,7 @@ def uhepplot(dataframe, variable, bins, stacks, selection=None,
weight=None, y_log=False, y_min=None, vlines=[],
denominator=0, numerator=-1, ratio_label=None, diff=False,
ratio_range=None, atlas=None, info=None, enlarge=1.6,
density=False, include_outside=False, **kwds):
density=False, include_outside=False, return_uhepp=False, **kwds):
"""
Creates a histogram of stacked processes. The first argument is the
dataframe to operate on. The 'variable' argument defines the x-axis. The
@@ -119,8 +120,201 @@ def uhepplot(dataframe, variable, bins, stacks, selection=None,
If the density argument is True, the area of each stack is normalized to
unity.
The method returns a Universal HEP plot object.
If return_uhepp is True, the method return a UHepPlot object.
"""
uhepp_obj = uhepp.UHepPlot()
uhepp_obj.producer = "nnfwtbn"
if (atlas is not False) or (info is not False):
uhepp_obj.atlas = True
atlas, info = fill_labels(atlas, info)
uhepp_obj.label = atlas
uhepp_obj.subtext = info
# Wrap column string by variable
if isinstance(variable, str):
variable = Variable(variable, variable)
uhepp_obj.symbol = variable.name
if variable.unit is not None:
uhepp_obj.unit = variable.unit
uhepp_obj.log = y_log
if weight is None:
weight = Variable("unity", lambda d: variable(d) * 0 + 1)
elif isinstance(weight, str):
weight = Variable(weight, weight)
squared_weight = Variable("squared weight", lambda d: weight(d)**2)
draw_ratio = (denominator is not None) and (numerator is not None)
# Disable ratio plots if there is only one stack
if len(stacks) == 1 and \
isinstance(denominator, int) and \
isinstance(numerator, int):
draw_ratio = False
# Handle selection
if selection is None:
selection = Cut(lambda d: variable(d) * 0 == 0)
elif not isinstance(selection, Cut):
selection = Cut(selection)
# Handle range/bins
if range is not None:
# Build bins
if not isinstance(bins, int):
raise err.InvalidBins("When range is given, bins must be int.")
if not isinstance(range, tuple) or len(range) != 2:
raise err.InvalidProcessSelection("Range argument must be a "
"tuple of two numbers.")
bins = np.linspace(range[0], range[1], bins + 1)
uhepp_obj.bin_edges = [float(x) for x in bins]
uhepp_obj.include_overflow = include_outside
uhepp_obj.include_overflow = include_outside
# Create separate under- and overflow bin
def pad(edges):
padded = [edges[0] - 1] + list(edges) + [edges[-1] + 1]
return np.asarray(padded)
bins = pad(bins)
if blind is None:
blind = []
elif isinstance(blind, Stack):
# Wrap scalar blind stack
blind = [blind]
# Handle stack
yields = {}
for i_stack, stack in enumerate(stacks):
uhepp_stack = uhepp.Stack()
uhepp_obj.stacks.append(uhepp_stack)
uhepp_stack.type = stack.get_histtype(0)
if stack in blind and variable.blinding is not None:
c_blind = variable.blinding(variable, bins)
else:
c_blind = lambda d: d
density_norm = 1.0
if density:
density_norm = stack.get_total(dataframe,
[float('-inf'), float('inf')],
variable, weight, include_outside).sum()
for i_process, process in enumerate(stack.processes):
histogram = stack.get_hist(c_blind(dataframe), i_process, bins,
variable, weight, include_outside)
histogram = histogram / density_norm
uncertainty = stack.get_total_uncertainty(c_blind(dataframe),
bins, variable,
weight,
include_outside)
uncertainty = uncertainty / density_norm
yield_ = {}
yield_["base"] = histogram
yield_["stat"] = uncertainty
process_id = f"s{i_stack}_p{i_process}"
yields[process_id] = yield_
uhepp_sitem = uhepp.StackItem()
uhepp_sitem.label = process.label
uhepp_sitem.style = stack.get_aux(i_process)
uhepp_sitem.yield_ = [process_id]
uhepp_stack.content.append(uhepp_sitem)
# Resolve numerator/denominator indices
if isinstance(numerator, int) and stacks[numerator] == stack:
numerator = stack
if isinstance(denominator, int) and stacks[denominator] == stack:
denominator= stack
if draw_ratio:
ratio = []
# Get denominator hist
if denominator in blind and variable.blinding is not None:
c_blind = variable.blinding(variable, bins)
else:
c_blind = lambda d: d
yield_ = {}
yield_["base"] = list(denominator.get_total(c_blind(dataframe), bins,
variable, weight,
include_outside))
yield_["stat"] = list(denominator.get_total_uncertainty(c_blind(dataframe),
bins, variable,
weight,
include_outside))
yields["den"] = yield_
# Process numerators
numerators = numerator
if not isinstance(numerators, (list, tuple)):
numerators = [numerators]
numerators_data = []
for i_numerator, numerator in enumerate(numerators):
if numerator in blind and variable.blinding is not None:
c_blind = variable.blinding(variable, bins)
else:
c_blind = lambda d: d
yield_ = {}
process_id = f"num_{i_numerator}"
yield_["base"] = numerator.get_total(c_blind(dataframe), bins,
variable, weight,
include_outside)
yield_["stat"] = numerator.get_total_uncertainty(c_blind(dataframe),
bins,
variable, weight,
include_outside)
yields[process_id] = yield_
uhepp_ritem = uhepp.RatioItem()
uhepp_ritem.numerator = [process_id]
uhepp_ritem.denominator = ["den"]
uhepp_ritem.style = {'markersize': 4, 'fmt': 'o'}
uhepp_ritem.style.update(numerator.get_aux(0))
histtype = numerator.get_histtype(0)
uhepp_ritem.type = "points" if histtype == "points" else "steps"
ratio.append(uhepp_ritem)
uhepp_obj.ratio = ratio
# Compute delayed dask
yields, = dask.compute(yields)
sterile_yields = {}
for key, item in yields.items():
sterile_item = {}
for item_key, sub_item in item.items():
sterile_item[item_key] = [float(x) for x in sub_item]
sterile_yields[key] = uhepp.Yield(sterile_item)
# TODO: Reorder processes if y_log
uhepp_obj.yields = sterile_yields
# TODO: vertical and horizontal lines
if return_uhepp:
return uhepp_obj
def hist(dataframe, variable, bins, stacks, selection=None,
Loading