From 0ac7d72614cf3a5b02eedca45f1e9ed898dea358 Mon Sep 17 00:00:00 2001 From: Ahmed Markhoos <ahmed.markhoos@cern.ch> Date: Mon, 10 Jul 2023 16:03:37 +0200 Subject: [PATCH] Update HPO docs, requiremets and fix few bugs --- docs/HPO.ipynb | 55 +++++++++++++++++++++++++++++++++---------- freeforestml/model.py | 13 ++++++---- requirements.txt | 1 + setup.py | 1 + 4 files changed, 52 insertions(+), 18 deletions(-) diff --git a/docs/HPO.ipynb b/docs/HPO.ipynb index ae8f6fa..ee00122 100644 --- a/docs/HPO.ipynb +++ b/docs/HPO.ipynb @@ -47,6 +47,13 @@ "df = toydata.get()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Prepare the model and the hyperparameters" + ] + }, { "cell_type": "code", "execution_count": null, @@ -87,8 +94,8 @@ "outputs": [], "source": [ "def model(hp):\n", - " hp_momentum = hp.Float('momentum', 0.0, 1.0, 0.05)\n", - " hp_rho = hp.Float('rho', 0.0, 1.0, 0.05)\n", + " hp_momentum = hp.Float('momentum', 0.0, 1.0, 0.25)\n", + " hp_rho = hp.Float('rho', 0.0, 1.0, 0.25)\n", " \n", " m = Sequential()\n", " m.add(Dense(units=15, activation='relu', input_dim=len(input_var)))\n", @@ -103,8 +110,9 @@ "\n", "cv = ClassicalCV(3, frac_var='random')\n", "\n", - "net = HepNetSearch(model, 'RandomSearch', cv, EstimatorNormalizer, input_var, output_var)\n", - "net.set_tuner(objective='val_categorical_accuracy', project_name='fold', max_trials=1000, seed=123)" + "net = HepNetSearch(model, 'GridSearch', cv, EstimatorNormalizer, input_var, output_var)\n", + "net.set_tuner(objective='val_categorical_accuracy', project_name='fold', \n", + " seed=123, overwrite=True)" ] }, { @@ -117,6 +125,13 @@ "ztt_wf = len(p_ztt.selection(df).weight) / p_ztt.selection(df).weight.sum()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "train/search over the hyperparameter space" + ] + }, { "cell_type": "code", "execution_count": null, @@ -124,7 +139,14 @@ "outputs": [], "source": [ "net.search(df.compute(), epochs=20, verbose=2, batch_size=2048,\n", - " weight=Variable(\"weight\", lambda d: d.weight * (d.is_sig * sig_wf + d.is_ztt * ztt_wf)), Nfmp=False)" + " weight=Variable(\"weight\", lambda d: d.weight * (d.is_sig * sig_wf + d.is_ztt * ztt_wf)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "HPO score book, sorted by best" ] }, { @@ -133,7 +155,14 @@ "metadata": {}, "outputs": [], "source": [ - "net.search_book" + "net.book(sort=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Best score architecture and hyperparater values" ] }, { @@ -142,7 +171,7 @@ "metadata": {}, "outputs": [], "source": [ - "net.book(sort=True)" + "net.trial_summary()" ] }, { @@ -167,7 +196,7 @@ "metadata": {}, "outputs": [], "source": [ - "book_pivoted = book.pivot('momentum','rho','mean')*100" + "book_pivoted = book.pivot(index='momentum', columns='rho', values='mean')*100" ] }, { @@ -188,8 +217,8 @@ "source": [ "figure(figsize=(0.8*8, 0.8*6), dpi=100)\n", "plt.scatter(book['std'], book['mean'], label='RandomSearch HPO', alpha=0.7)\n", - "plt.xlabel('Fold std $\\sigma$')\n", - "plt.ylabel('Fold Mean $\\mu$')\n", + "plt.xlabel('Score $\\sigma$')\n", + "plt.ylabel('Score $\\mu$')\n", "atlasify('Internal')" ] }, @@ -203,9 +232,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python3.8 FFML", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python38-ffml" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -217,7 +246,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/freeforestml/model.py b/freeforestml/model.py index 3ef7179..c48bdf4 100644 --- a/freeforestml/model.py +++ b/freeforestml/model.py @@ -1111,7 +1111,10 @@ class HepNetSearch: trial_position = None for i in range(oracle_trials): - if hps_dict == hps_trials[i].values: + #remove non-hps items before comparison + hps_trial = hps_trials[i].values + hps_trial = { key:hps_trial[key] for key, value in hps_dict.items() } + if hps_dict == hps_trial: trial_position = i break @@ -1261,12 +1264,12 @@ class HepNetSearch: print('Warning: Objective direction is neither max nor min! Defaulted to descending order for optimal trial mean.') - ###TODO: The following is a work arround for dropped trials. It is in fact a performance issue - ###1-It affects run time 2-It might affect the possibility of reaching the optimal hyperparameter value - + ###TODO: The following is a work around dropped trials. It is indeed a performance issue. + ###1-It affects run time 2-It affects the optimal hyperparameter set + ###Currently, the best course of action is to seed the models identically #Remove tuner dropped trials if filter_nan: - dropped_trial_indices = self.search_book[self.search_book.isnull().any(1)].index.tolist() + dropped_trial_indices = self.search_book[self.search_book.isnull().any(axis=1)].index.tolist() search_book = self.search_book.drop(dropped_trial_indices) else: search_book = self.search_book diff --git a/requirements.txt b/requirements.txt index a1822f3..5a7434d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ dill h5py numpy scipy +scikit-learn matplotlib seaborn tables diff --git a/setup.py b/setup.py index fb62e9d..629eba0 100644 --- a/setup.py +++ b/setup.py @@ -40,4 +40,5 @@ setup(name='freeforestml', "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering :: Physics"]) -- GitLab