Skip to content
Snippets Groups Projects
Commit b5dbfb02 authored by Wachter, Christoph's avatar Wachter, Christoph
Browse files

(CW) added (experimental) script to optimized hyperparameters

parent f53da738
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
# Copyright (C) 2024 Oliver T. Hofmann
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
#
# If you are using this software for scientific purposes, please cite it as:
# L Hoermann et al., Computer Physics Communications (2019), 143-155
import itertools
import numpy as np
import multiprocessing
from sample import HyperParameters, PairwiseFeaturesData, DiscreteBayesianLearner
from sample.helpers.Utilities import trainLearner, createNonInteractingPriorMean
def apply_kwargs(fn, kwargs):
return fn(**kwargs)
# for multiprocessing with kwargs, see:
# https://stackoverflow.com/questions/45718523/pass-kwargs-to-starmap-while-using-pool-in-python
def starstarmap(pool, fn, kwargs_iter):
"""Only apply keyword arguments to a function that is mapped with mulitprocessing."""
args_for_starmap = zip(itertools.repeat(fn), kwargs_iter)
return pool.starmap(apply_kwargs, args_for_starmap)
def writeDictToFile(file, dictionary):
"""Write dictionary to file in a simple fashion."""
for key, value in dictionary.items():
file.write("%s:%s\n" % (key, value))
def constructHyperParamCombinations(
E_ads_std,
E_pair_std,
DFT_noise,
decay_length,
decay_power,
correlation_length,
dmin,
dmax,
feature_threshold,
feature_dimension,
atom_indices,
original_geometry_indices=[None],
prior_cov_kernel=["additive"],
):
"""Construct a list containing all possible combinations of the given hyperparameters."""
inp = locals()
hyperparam_keys = tuple([*inp])
hyperparam_vals = tuple([*inp.values()])
hyperparams_combinations = []
for val in itertools.product(*hyperparam_vals):
hyperparams_combinations.append(dict(zip(hyperparam_keys, val)))
return inp, hyperparams_combinations
def _createLearner(
proj,
learner_name,
property_key,
unit_string="eV",
is_gas_phase=False,
**kwargs,
):
"""Helper function to create learner."""
hyperparams = HyperParameters(name=learner_name, property_key=property_key, unit_string=unit_string, **kwargs)
features_data = PairwiseFeaturesData.fromHyperParameters(proj, hyperparams)
learner = DiscreteBayesianLearner.fromHyperParameters(proj, hyperparams)
learner.setFeaturesData(features_data)
# set prior
learner.calculatePrior()
prior_mean = createNonInteractingPriorMean(proj, learner.features_data, property_key, is_gasphase=is_gas_phase)
learner.setPriorMean(prior_mean)
return learner
class HyperParamOptimization:
"""Class for hyperparameter optimization"""
def __init__(
self,
proj,
property_key, # determines training set and learned property
gas_phase_property_key=None,
training_set=None,
gas_phase_training_set=None,
unit_string="eV",
learner_name="hyperparam_opt",
is_gas_phase=False,
n_processes=-1,
):
self.proj = proj
self.unit_string = unit_string
self.property_key = property_key
self.gas_phase_property_key = gas_phase_property_key
self.learner_name = learner_name
self.is_gas_phase = is_gas_phase
self.outfile = "hyperparam_opt.out"
if n_processes == -1:
n_processes = multiprocessing.cpu_count()
self.n_processes = n_processes
if training_set is None:
self.training_set = self.proj.getConfigurationSetByProperty(self.property_key)
else:
self.training_set = training_set
if self.gas_phase_property_key is not None:
if gas_phase_training_set is None:
self.gas_phase_training_set = self.proj.getConfigurationSetByProperty(self.gas_phase_property_key)
else:
self.gas_phase_training_set = gas_phase_training_set
def setHyperParams(
self,
E_ads_std,
E_pair_std,
DFT_noise,
decay_length,
decay_power,
correlation_length,
dmin,
dmax,
feature_threshold,
feature_dimension,
atom_indices,
original_geometry_indices=[None],
prior_cov_kernel=["additive"],
):
"""Create and set all hyperparameter combinations used in the optimization."""
kwargs = locals().copy()
kwargs.pop("self")
self.input, self.hyperparam_combinations = constructHyperParamCombinations(**kwargs)
def writeOptimizationInfo(self):
file = open(self.outfile, "w")
file.write("Starting hyperparameter optimization\n")
file.write("Printing hyperparamters given as input:\n\n")
if not hasattr(self, "input"):
file.write("No hyperparameter input given! Please call 'setHyperParams' first.\n")
file.write("Exiting")
file.close()
RuntimeError("No hyperparameter input given! Please call 'setHyperParams' first.")
writeDictToFile(file, self.input)
file.write("\n")
file.write(f"Number of hyperparameter combinations: {len(self.hyperparam_combinations)}\n")
file.write(f"Using {self.n_processes} processes\n")
file.write("Looping over all possible combinations of input values\n\n")
file.close()
def writeRSME(self, file, errors):
if self.gas_phase_property_key is None:
file.write("\n")
file.write(f"RSME_LOOCV: {errors[0]}\n")
file.write(f"error_max_LOOCV: {errors[1]}\n")
file.write("\n")
else:
file.write("\n")
file.write(f"Gas-phase RMSE_LOOCV: {errors[0]}\n")
file.write(f"Gas-phase error_max_LOOCV: {errors[1]}\n")
file.write(f"RSME_LOOCV: {errors[2]}\n")
file.write(f"error_max_LOOCV: {errors[3]}\n")
file.write("\n")
def getRSMEForHyperParams(self, **kwargs):
errors = []
reduced_training = False
prior_from_gas_phase = None
# learn gas phase prior if gas_phase_property_key is specified
if self.gas_phase_property_key is not None and not self.is_gas_phase:
learner_gas_phase = _createLearner(self.proj, self.learner_name + "_gas_phase", self.gas_phase_property_key, unit_string=self.unit_string, is_gas_phase=True, **kwargs)
learner_gas_phase.addConfigurationSet(self.gas_phase_training_set)
trainLearner(learner_gas_phase, self.gas_phase_training_set, training_set=self.gas_phase_training_set)
prior_from_gas_phase = learner_gas_phase.interactions_mean
errors.append(learner_gas_phase.RMSE_LOOCV)
errors.append(learner_gas_phase.error_max_LOOCV)
# main learner
learner = _createLearner(self.proj, self.learner_name, self.property_key, unit_string=self.unit_string, is_gas_phase=self.is_gas_phase, **kwargs)
learner.addConfigurationSet(self.training_set)
if prior_from_gas_phase is not None:
learner.prior_mean[learner.features_data.n_1body:] = prior_from_gas_phase[learner.features_data.n_1body:]
trainLearner(learner, possible_configurations=self.training_set, training_set=self.training_set)
errors.append(learner.RMSE_LOOCV)
errors.append(learner.error_max_LOOCV)
return errors
def runOptimization(self):
"""Run parallel hyperparameter optimization."""
self.writeOptimizationInfo()
delim_string = "##############################\n"
n_proc = self.n_processes
n_hyp = len(self.hyperparam_combinations)
min_rsme = np.inf
min_errors = None
min_hyperparams = None
if self.gas_phase_property_key is None:
n_rmse = 0
else:
n_rmse = 2
# divide the hyperparameter combinations into chunks
hyperparam_chunks = [
self.hyperparam_combinations[i:i + n_proc] for i in range(0, n_hyp, n_proc)
]
for chunk in hyperparam_chunks:
# create learners using a single thread to save memory
pool = multiprocessing.Pool(n_proc)
for i, errors in enumerate(starstarmap(pool, self.getRSMEForHyperParams, chunk)):
file = open(self.outfile, "a")
file.write(delim_string)
file.write("Hyperparamters:\n")
writeDictToFile(file, chunk[i])
self.writeRSME(file, errors)
if errors[n_rmse] < min_rsme:
min_rsme = errors[n_rmse]
min_errors = errors
min_hyperparams = chunk[i]
file.write("This is the new minimum!\n\n")
file.write(delim_string)
file.close()
pool.close()
# write final output
file = open(self.outfile, "a")
file.write("Finished iterating through all hyperparameter combinations\n\n")
file.write("Result with best RSME:\n\n")
file.write(delim_string)
file.write("Hyperparamters:\n")
writeDictToFile(file, min_hyperparams)
self.writeRSME(file, min_errors)
file.write(delim_string)
file.write("\n")
file.write("Have a nice day.\n")
file.close()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment