Source code for libuplift.meta.base
"""Base classe for uplift meta models."""
import numpy as np
from sklearn.base import clone
from sklearn.utils import check_X_y, check_consistent_length
from sklearn.utils.metaestimators import _BaseComposition
from ..utils import check_trt
[docs]
class UpliftMetaModelBase(_BaseComposition):
"""Base class for uplift meta estimators.
Checks input consistency, builds classifiers on subsets of data.
Derived classess need to overwride the _get_model_names_list and
_iter_training_subsets methods. The predict method needs to be
implemented as well.
"""
def __init__(self, base_estimator):
self.base_estimator = base_estimator
def _get_model_names_list(self, X=None, y=None, trt=None):
"""Return a list of names of constituent
classification/regression models.
This method should be overridden such that the number of
models can be determined by _check_base_estimator. If model
name starts with '_' None will be put in model list instead of
a real model and _ removed from name (useful to keep the list
of given size even if some models are not used).
"""
raise NotImplementedError()
def _iter_training_subsets(self, X, y, trt, n_trt, sample_weight):
"""Return training sets for all models in the meta model.
Each iteration returns a triple of predictor matrix, target
vector, and sample weights (possibly None). While iterating
i-th subset it may be assumed that models on previous subsets
have been fitted.
"""
raise NotImplementedError()
def _check_base_estimator(self, model_names):
if len(model_names) > 0 and not isinstance(model_names[0], str):
# full model list is provided by _get_model_names_list
estimator_list = []
for m_name, est in model_names:
if m_name.startswith("_"):
estimator_list.append((m_name[1:], None))
else:
estimator_list.append((m_name, clone(est)))
elif hasattr(self.base_estimator, "fit"):
estimator_list = []
for m_name in model_names:
if m_name.startswith("_"):
estimator_list.append((m_name[1:], None))
else:
estimator_list.append((m_name, clone(self.base_estimator)))
else:
# full model list is provided by user, check length and names
estimator_list = self.base_estimator
new_estimator_list = []
for m_name, (user_m_name, model) in zip(model_names, estimator_list):
if m_name != user_m_name:
raise RuntimeError(f"Expected model name {m_name}, got {user_m_name}")
if m_name.startswith("_") and model is not None:
raise RuntimeError(f"Model name {m_name} starts with '_' but the model is not None")
if not m_name.startswith("_") and model is None:
raise RuntimeError(f"Model name {m_name} does not start with '_' but the model is None")
new_estimator_list.append((m_name, clone(model)))
estimator_list = new_estimator_list
return estimator_list
[docs]
def fit(self, X, y, trt, n_trt=None, sample_weight=None, **kwargs):
X, y = check_X_y(X, y, accept_sparse="csr")
trt, n_trt = check_trt(trt, n_trt)
check_consistent_length(X, y, trt)
self._set_fit_params(y, trt, n_trt)
model_names_iterator = self._get_model_names_list(X, y, trt, **kwargs)
self.models_ = self._check_base_estimator(model_names_iterator)
self.n_models_ = len(self.models_)
self.n_ = np.zeros(self.n_models_, dtype=int)
training_subset_iterator = self._iter_training_subsets(X, y, trt, n_trt, sample_weight)
for i, (X_i, y_i, w_i) in enumerate(training_subset_iterator):
m_name, m_i = self.models_[i]
if m_i is not None:
if w_i is None:
self.n_[i] = X_i.shape[0]
m_i.fit(X_i, y_i)
else:
self.n_[i] = w_i.sum()
m_i.fit(X_i, y_i, sample_weight=w_i)
return self
[docs]
def get_params(self, deep=True):
"""Get parameters for this estimator.
Parameters
----------
deep : boolean, optional
If True, will return the parameters for this estimator and
contained subobjects that are estimators.
Returns
-------
params : mapping of string to any
Parameter names mapped to their values.
"""
if hasattr(self.base_estimator, "fit"):
return super().get_params(deep=deep)
return self._get_params('base_estimator', deep=deep)
[docs]
def set_params(self, **kwargs):
"""Set the parameters of this estimator.
Valid parameter keys can be listed with ``get_params()``.
Returns
-------
self
"""
if hasattr(self.base_estimator, "fit"):
return super().set_params(**kwargs)
else:
self._set_params('base_estimator', **kwargs)
return self