diff --git a/docs/HPO.ipynb b/docs/HPO.ipynb index ae8f6faf4d64160a3d3a2cdf5b35b3f041613ee7..ee00122b175c51bc37c854fb93b13e732d44483b 100644 --- a/docs/HPO.ipynb +++ b/docs/HPO.ipynb @@ -47,6 +47,13 @@ "df = toydata.get()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Prepare the model and the hyperparameters" + ] + }, { "cell_type": "code", "execution_count": null, @@ -87,8 +94,8 @@ "outputs": [], "source": [ "def model(hp):\n", - " hp_momentum = hp.Float('momentum', 0.0, 1.0, 0.05)\n", - " hp_rho = hp.Float('rho', 0.0, 1.0, 0.05)\n", + " hp_momentum = hp.Float('momentum', 0.0, 1.0, 0.25)\n", + " hp_rho = hp.Float('rho', 0.0, 1.0, 0.25)\n", " \n", " m = Sequential()\n", " m.add(Dense(units=15, activation='relu', input_dim=len(input_var)))\n", @@ -103,8 +110,9 @@ "\n", "cv = ClassicalCV(3, frac_var='random')\n", "\n", - "net = HepNetSearch(model, 'RandomSearch', cv, EstimatorNormalizer, input_var, output_var)\n", - "net.set_tuner(objective='val_categorical_accuracy', project_name='fold', max_trials=1000, seed=123)" + "net = HepNetSearch(model, 'GridSearch', cv, EstimatorNormalizer, input_var, output_var)\n", + "net.set_tuner(objective='val_categorical_accuracy', project_name='fold', \n", + " seed=123, overwrite=True)" ] }, { @@ -117,6 +125,13 @@ "ztt_wf = len(p_ztt.selection(df).weight) / p_ztt.selection(df).weight.sum()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "train/search over the hyperparameter space" + ] + }, { "cell_type": "code", "execution_count": null, @@ -124,7 +139,14 @@ "outputs": [], "source": [ "net.search(df.compute(), epochs=20, verbose=2, batch_size=2048,\n", - " weight=Variable(\"weight\", lambda d: d.weight * (d.is_sig * sig_wf + d.is_ztt * ztt_wf)), Nfmp=False)" + " weight=Variable(\"weight\", lambda d: d.weight * (d.is_sig * sig_wf + d.is_ztt * ztt_wf)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "HPO score book, sorted by best" ] }, { @@ -133,7 +155,14 @@ "metadata": {}, "outputs": [], "source": [ - "net.search_book" + "net.book(sort=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Best score architecture and hyperparater values" ] }, { @@ -142,7 +171,7 @@ "metadata": {}, "outputs": [], "source": [ - "net.book(sort=True)" + "net.trial_summary()" ] }, { @@ -167,7 +196,7 @@ "metadata": {}, "outputs": [], "source": [ - "book_pivoted = book.pivot('momentum','rho','mean')*100" + "book_pivoted = book.pivot(index='momentum', columns='rho', values='mean')*100" ] }, { @@ -188,8 +217,8 @@ "source": [ "figure(figsize=(0.8*8, 0.8*6), dpi=100)\n", "plt.scatter(book['std'], book['mean'], label='RandomSearch HPO', alpha=0.7)\n", - "plt.xlabel('Fold std $\\sigma$')\n", - "plt.ylabel('Fold Mean $\\mu$')\n", + "plt.xlabel('Score $\\sigma$')\n", + "plt.ylabel('Score $\\mu$')\n", "atlasify('Internal')" ] }, @@ -203,9 +232,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python3.8 FFML", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python38-ffml" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -217,7 +246,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/freeforestml/model.py b/freeforestml/model.py index 3ef7179a308b8c48243ca80c78a7b408133ca766..c48bdf4c9e126e3a011de449de7eebdd0afa477e 100644 --- a/freeforestml/model.py +++ b/freeforestml/model.py @@ -1111,7 +1111,10 @@ class HepNetSearch: trial_position = None for i in range(oracle_trials): - if hps_dict == hps_trials[i].values: + #remove non-hps items before comparison + hps_trial = hps_trials[i].values + hps_trial = { key:hps_trial[key] for key, value in hps_dict.items() } + if hps_dict == hps_trial: trial_position = i break @@ -1261,12 +1264,12 @@ class HepNetSearch: print('Warning: Objective direction is neither max nor min! Defaulted to descending order for optimal trial mean.') - ###TODO: The following is a work arround for dropped trials. It is in fact a performance issue - ###1-It affects run time 2-It might affect the possibility of reaching the optimal hyperparameter value - + ###TODO: The following is a work around dropped trials. It is indeed a performance issue. + ###1-It affects run time 2-It affects the optimal hyperparameter set + ###Currently, the best course of action is to seed the models identically #Remove tuner dropped trials if filter_nan: - dropped_trial_indices = self.search_book[self.search_book.isnull().any(1)].index.tolist() + dropped_trial_indices = self.search_book[self.search_book.isnull().any(axis=1)].index.tolist() search_book = self.search_book.drop(dropped_trial_indices) else: search_book = self.search_book diff --git a/requirements.txt b/requirements.txt index a1822f39e3dc830098bbcc9dbafa2f5136a39990..5a7434db5c443c5347aa9a1d8c3521c1a7f3b705 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ dill h5py numpy scipy +scikit-learn matplotlib seaborn tables diff --git a/setup.py b/setup.py index fb62e9d8b2f75d8c16db3880d8d463f69a22bf9e..629eba01271854ffe0b8b8343ed851e173abd36c 100644 --- a/setup.py +++ b/setup.py @@ -40,4 +40,5 @@ setup(name='freeforestml', "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering :: Physics"])