Source code for synergy.single.hill

#    Copyright (C) 2020 David J. Wooten
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.

from typing import Dict, List, Tuple

import numpy as np
from scipy.stats import linregress

from synergy.exceptions import ModelNotParameterizedError
from synergy.single.dose_response_model_1d import ParametricDoseResponseModel1D


[docs]class Hill(ParametricDoseResponseModel1D): """The four-parameter Hill equation E = E0 + (Emax - E0) * d^h / (C^h + d^h) The Hill equation is a standard model for single-drug dose-response curves. This is the base model for Hill_2P and Hill_CI. """ def __init__(self, **kwargs): """Ctor.""" # To minimize risk of overflow or floating-point precision issues, we linearly scale # doses passed into fit to be centered around 0 on a log scale. # This variable stores that scale and is used to reverse it when fitting C. self._dose_scale: float = 1.0 self.fit_function = self._model_to_fit self.jacobian_function = self._model_jacobian_for_fit super().__init__(**kwargs)
[docs] def E(self, d): if not self.is_specified: raise ModelNotParameterizedError("Model mustbe specified before calling E().") return self._model(d, self.E0, self.Emax, self.h, self.C)
[docs] def E_inv(self, E): if not self.is_specified: raise ModelNotParameterizedError("Model mustbe specified before calling E().") return self._model_inv(E, self.E0, self.Emax, self.h, self.C)
@property def _parameter_names(self) -> List[str]: return ["E0", "Emax", "h", "C"] @property def _default_fit_bounds(self) -> Dict[str, Tuple[float, float]]: return {"h": (0.0, np.inf), "C": (0.0, np.inf)} def _set_dose_scale(self, d): """Find the scaling factor that will normalize the dose scale to be log-centered around 0. We want to find s such that d' = d / s has mean(log(d')) = 0 We will then use d' when fitting the data. Importantly, this will mean we are fitting C' = C / s, not just C. Consequently we must update C_bounds to C'_bounds. """ C_idx = self._parameter_names.index("C") # Revert old C'_bounds back to C_bounds if C_idx >= 0: self._bounds[0][C_idx] += np.log(self._dose_scale) self._bounds[1][C_idx] += np.log(self._dose_scale) # Calculate dose scale self._dose_scale = np.exp(np.median(np.log(d[d > 0]))) # Update C_bounds to C'_bounds if C_idx >= 0: self._bounds[0][C_idx] -= np.log(self._dose_scale) self._bounds[1][C_idx] -= np.log(self._dose_scale)
[docs] def fit(self, d, E, use_jacobian=True, bootstrap_iterations=0, **kwargs): self._set_dose_scale(d) super().fit( d / self._dose_scale, E, use_jacobian=use_jacobian, bootstrap_iterations=bootstrap_iterations, **kwargs )
def _set_parameters(self, parameters): self.E0, self.Emax, self.h, self.C = parameters def _model(self, d, E0, Emax, h, C): """Hill equation.""" dh = np.float_power(d, h) return E0 + (Emax - E0) * (dh / (C**h + dh)) def _model_to_fit(self, d, E0, Emax, logh, logC): """Hill equation expecting log-transformed parameters h and C parameters, for fitting.""" return self._model(d, E0, Emax, np.exp(logh), np.exp(logC)) def _model_inv(self, E, E0, Emax, h, C): """Inverse Hill equation.""" E_ratio = (E - E0) / (Emax - E) d = np.float_power(E_ratio, 1.0 / h) * C # For any E's outside of the range E0 to Emax, E_inv should be nan if hasattr(E, "__iter__"): d[E_ratio < 0] = np.nan return d # Dose cannot be negative elif d < 0: return np.nan return d def _model_jacobian_for_fit(self, d, E0, Emax, logh, logC): """Hill equation jacobian, expecting log-transformed h and C, for fitting. Return ------ jacobian : array_like Derivatives of the Hill equation with respect to E0, Emax, logh, and logC """ h = np.exp(logh) d_pow_h = d**h C_pow_h = np.exp(logC) ** h logd = np.log(d) jE0 = 1 - d_pow_h / (C_pow_h + d_pow_h) jEmax = 1 - jE0 jC = (E0 - Emax) * d_pow_h * h * C_pow_h / ((C_pow_h + d_pow_h) * (C_pow_h + d_pow_h)) jh = ( (Emax - E0) * d_pow_h * h * ((C_pow_h + d_pow_h) * logd - (logC * C_pow_h + logd * d_pow_h)) / ((C_pow_h + d_pow_h) * (C_pow_h + d_pow_h)) ) jac = np.hstack((jE0.reshape(-1, 1), jEmax.reshape(-1, 1), jh.reshape(-1, 1), jC.reshape(-1, 1))) jac[np.isnan(jac)] = 0 return jac def _get_initial_guess(self, d, E, p0): """Default initial guess is E0=E(dmin), Emax=E(dmax), h=1, C=median(d)""" if p0 is None: p0 = [np.median(E[d == min(d)]), np.median(E[d == max(d)]), 1, np.median(d) * self._dose_scale] return super()._get_initial_guess(d, E, p0) def _transform_params_from_fit(self, params): """Exponentiate h and C to get the final parameters.""" return params[0], params[1], np.exp(params[2]), np.exp(params[3]) * self._dose_scale def _transform_params_to_fit(self, params): """Log-transform h and C for fitting.""" with np.errstate(divide="ignore"): return params[0], params[1], np.log(params[2]), np.log(params[3] / self._dose_scale) def __repr__(self): if not self.is_specified: return "Hill()" return "Hill(E0=%0.3g, Emax=%0.3g, h=%0.3g, C=%0.3g)" % (self.E0, self.Emax, self.h, self.C)
[docs]class Hill_2P(Hill): """The two-parameter Hill equation E = E0 + (Emax - E0) * d^h / (C^h + d^h) Mathematically equivalent to the four-parameter Hill equation, but E0 and Emax are held constant (not fit to data). """ def __init__(self, E0=1.0, Emax=0.0, **kwargs): self.E0 = E0 self.Emax = Emax super().__init__(**kwargs) def _model_to_fit(self, d, logh, logC): return self._model(d, self.E0, self.Emax, np.exp(logh), np.exp(logC)) def _model_jacobian_for_fit(self, d, logh, logC): h = np.exp(logh) d_pow_h = d**h C_pow_h = np.exp(logC) ** h squared_sum = np.float_power(C_pow_h + d_pow_h, 2.0) logd = np.log(d) E0 = self.E0 Emax = self.Emax jC = (E0 - Emax) * (d_pow_h * h * C_pow_h / squared_sum) jh = (Emax - E0) * d_pow_h * h * ((C_pow_h + d_pow_h) * logd - (logC * C_pow_h + logd * d_pow_h)) / squared_sum jac = np.hstack((jh.reshape(-1, 1), jC.reshape(-1, 1))) jac[np.isnan(jac)] = 0 return jac @property def _parameter_names(self) -> List[str]: return ["h", "C"] def _get_initial_guess(self, d, E, p0): if p0 is None: p0 = [1, np.median(d)] return super()._get_initial_guess(d, E, p0) def _set_parameters(self, popt): h, C = popt self.h = h self.C = C def _transform_params_from_fit(self, params): return np.exp(params[0]), np.exp(params[1]) * self._dose_scale def _transform_params_to_fit(self, params): with np.errstate(divide="ignore"): return np.log(params[0]), np.log(params[1] / self._dose_scale) def __repr__(self): if not self.is_specified: return "Hill_2P()" return "Hill_2P(E0=%0.3g, Emax=%0.3g, h=%0.3g, C=%0.3g)" % ( self.E0, self.Emax, self.h, self.C, )
[docs]class Hill_CI(Hill_2P): """Model used to calculate Combination Index synergy. Mathematically this equivalent two-parameter Hill equation with E0=1 and Emax=0. However, Hill_CI.fit() uses a log-linearization approach to dose-response fitting used by the Combination Index. """ def __init__(self, **kwargs): kwargs["E0"] = 1.0 kwargs["Emax"] = 0.0 super().__init__(**kwargs) def _fit(self, d, E, use_jacobian, **kwargs): """Override the parent function to use linregress() instead of curve_fit()""" mask = np.where((E < 1) & (E > 0) & (d > 0)) E = E[mask] d = d[mask] fU = E fA = 1 - E median_effect_line = linregress(np.log(d), np.log(fA / fU)) h = median_effect_line.slope C = np.exp(-median_effect_line.intercept / h) C *= self._dose_scale return (h, C) def plot_linear_fit(self, d, E, ax=None): if not self.is_specified: raise ModelNotParameterizedError() try: from matplotlib import pyplot as plt except ImportError: # TODO: Error # TODO: Move this whole function to utils.plot return mask = np.where((E < 1) & (E > 0) & (d > 0)) E = E[mask] d = d[mask] fU = E fA = 1 - E ax_created = False if ax is None: ax_created = True fig = plt.figure(figsize=(4, 4)) ax = fig.add_subplot(111) ax.scatter(np.log(d), np.log(fA / fU)) ax.plot(np.log(d), np.log(d) * self.h - self.h * np.log(self.C)) # Draw bootstraps with low opacity # for i in range(self.bootstrap_parameters.shape[0]): # h, C = self.bootstrap_parameters[i, :] # ax.plot(np.log(d), np.log(d) * h - h * np.log(C), c="k", alpha=0.1, lw=0.5) ax.set_ylabel("h * log(d) - h * log(C)") ax.set_xlabel("log(d)") ax.set_title("Combination Index linearization") if ax_created: plt.tight_layout() plt.show() def _bootstrap_resample(self, d, E, use_jacobian, bootstrap_iterations, **kwargs): """Bootstrap resampling is not yet implemented for CI""" def __repr__(self): if not self.is_specified: return "Hill_CI()" return "Hill_CI(h=%0.3g, C=%0.3g)" % (self.h, self.C)