diff --git a/cobra/model_building/forward_selection.py b/cobra/model_building/forward_selection.py index 29e06b3..50961f9 100644 --- a/cobra/model_building/forward_selection.py +++ b/cobra/model_building/forward_selection.py @@ -31,6 +31,12 @@ class ForwardFeatureSelection: selection. pos_only : bool Whether or not the model coefficients should all be positive (no sign flips). + model_kwargs: dict, optional + An optional dictionary of hyperparameters and their values to + override the default hyperparameters that Cobra uses when + constructing the model during forward selection. + For more info, see the documentation of kwargs in the documentation + of the model that is used (e.g. LinearRegressionModel). self._fitted_models : list List of fitted models. """ @@ -38,7 +44,8 @@ class ForwardFeatureSelection: def __init__(self, model_type: str="classification", max_predictors: int=50, - pos_only: bool=True): + pos_only: bool=True, + model_kwargs: Optional[dict]=None): self.model_type = model_type if model_type == "classification": @@ -49,6 +56,8 @@ def __init__(self, self.max_predictors = max_predictors self.pos_only = pos_only + self.model_kwargs = model_kwargs + self._fitted_models = [] def get_model_from_step(self, step: int): @@ -347,7 +356,10 @@ def _train_model(self, train_data: pd.DataFrame, target_column_name: str, self.MLModel Trained model. """ - model = self.MLModel() + if self.model_kwargs is None: + model = self.MLModel() + else: + model = self.MLModel(**self.model_kwargs) model.fit(train_data[predictors], train_data[target_column_name]) diff --git a/cobra/model_building/models.py b/cobra/model_building/models.py index 3a921c0..1977b54 100644 --- a/cobra/model_building/models.py +++ b/cobra/model_building/models.py @@ -25,14 +25,29 @@ class LogisticRegressionModel: scikit-learn logistic regression model. predictors : list List of predictors used in the model. + kwargs: dict, optional + Pass a dictionary here (optional!), to override Cobra's default + choice of hyperparameter values for the scikit-learn + LogisticRegression model that is used behind the scenes. + Cobra's defaults are: fit_intercept=True, C=1e9, solver='liblinear', + random_state=42. + See scikit-learn's documentation of the possible hyperparameters and + values that can be set: + https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html """ - def __init__(self): - self.logit = LogisticRegression(fit_intercept=True, C=1e9, - solver='liblinear', random_state=42) + def __init__(self, **kwargs): + # Initialize a scikit-learn linear regression model, + # with custom arguments passed by the data scientist (if any), + # supplemented with Cobra's default arguments, if a custom value was + # not provided by the data scientist for overriding purposes: + model_kwargs = dict(fit_intercept=True, C=1e9, solver='liblinear', + random_state=42) + model_kwargs.update(kwargs) + self.logit = LogisticRegression(**model_kwargs) + self._is_fitted = False - # placeholder to keep track of a list of predictors - self.predictors = [] + self.predictors = [] # placeholder to keep track of a list of predictors self._eval_metrics_by_split = {} def serialize(self) -> dict: @@ -104,7 +119,12 @@ def get_intercept(self) -> float: float Intercept of the model. """ - return self.logit.intercept_[0] + if self.logit.fit_intercept: + return self.logit.intercept_[0] + else: + raise ValueError("An intercept cannot be returned: this " + "LogisticRegressionModel was created with " + "the hyperparameter fit_intercept set to False.") def get_coef_by_predictor(self) -> dict: """Returns a dictionary mapping predictor (key) to coefficient (value). @@ -258,10 +278,26 @@ class LinearRegressionModel: scikit-learn linear regression model. predictors : list List of predictors used in the model. + kwargs: dict, optional + Pass a dictionary here (optional!), to override Cobra's default + choice of hyperparameter values for the scikit-learn + LinearRegression model that is used behind the scenes. + Cobra's only default setting is fit_intercept=True, but there are + other hyperparmeters that can be set too. + See scikit-learn's documentation of the possible hyperparameters and + values that can be set: + https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html """ - def __init__(self): - self.linear = LinearRegression(fit_intercept=True) + def __init__(self, **kwargs): + # Initialize a scikit-learn linear regression model, + # with custom arguments passed by the data scientist (if any), + # supplemented with Cobra's default arguments, if a custom value was + # not provided by the data scientist for overriding purposes: + model_kwargs = dict(fit_intercept=True) + model_kwargs.update(kwargs) + self.linear = LinearRegression(**model_kwargs) + self._is_fitted = False self.predictors = [] # placeholder to keep track of a list of predictors self._eval_metrics_by_split = {}