Source code for synergy.single.hill

#    Copyright (C) 2020 David J. Wooten
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.

from typing import Dict, List, Tuple

import numpy as np
from scipy.stats import linregress

from synergy.exceptions import ModelNotParameterizedError
from synergy.single.dose_response_model_1d import ParametricDoseResponseModel1D


[docs]class Hill(ParametricDoseResponseModel1D):
    """The four-parameter Hill equation

    E = E0 + (Emax - E0) * d^h / (C^h + d^h)

    The Hill equation is a standard model for single-drug dose-response curves.
    This is the base model for Hill_2P and Hill_CI.
    """

    def __init__(self, **kwargs):
        """Ctor."""
        # To minimize risk of overflow or floating-point precision issues, we linearly scale
        # doses passed into fit to be centered around 0 on a log scale.
        # This variable stores that scale and is used to reverse it when fitting C.
        self._dose_scale: float = 1.0

        self.fit_function = self._model_to_fit
        self.jacobian_function = self._model_jacobian_for_fit

        super().__init__(**kwargs)

[docs]    def E(self, d):
        if not self.is_specified:
            raise ModelNotParameterizedError("Model mustbe specified before calling E().")

        return self._model(d, self.E0, self.Emax, self.h, self.C)

[docs]    def E_inv(self, E):
        if not self.is_specified:
            raise ModelNotParameterizedError("Model mustbe specified before calling E().")

        return self._model_inv(E, self.E0, self.Emax, self.h, self.C)

    @property
    def _parameter_names(self) -> List[str]:
        return ["E0", "Emax", "h", "C"]

    @property
    def _default_fit_bounds(self) -> Dict[str, Tuple[float, float]]:
        return {"h": (0.0, np.inf), "C": (0.0, np.inf)}

    def _set_dose_scale(self, d):
        """Find the scaling factor that will normalize the dose scale to be log-centered around 0.

        We want to find s such that d' = d / s has
        mean(log(d')) = 0

        We will then use d' when fitting the data.
        Importantly, this will mean we are fitting C' = C / s, not just C.
        Consequently we must update C_bounds to C'_bounds.
        """
        C_idx = self._parameter_names.index("C")
        # Revert old C'_bounds back to C_bounds
        if C_idx >= 0:
            self._bounds[0][C_idx] += np.log(self._dose_scale)
            self._bounds[1][C_idx] += np.log(self._dose_scale)

        # Calculate dose scale
        self._dose_scale = np.exp(np.median(np.log(d[d > 0])))

        # Update C_bounds to C'_bounds
        if C_idx >= 0:
            self._bounds[0][C_idx] -= np.log(self._dose_scale)
            self._bounds[1][C_idx] -= np.log(self._dose_scale)

[docs]    def fit(self, d, E, use_jacobian=True, bootstrap_iterations=0, **kwargs):
        self._set_dose_scale(d)
        super().fit(
            d / self._dose_scale, E, use_jacobian=use_jacobian, bootstrap_iterations=bootstrap_iterations, **kwargs
        )

    def _set_parameters(self, parameters):
        self.E0, self.Emax, self.h, self.C = parameters

    def _model(self, d, E0, Emax, h, C):
        """Hill equation."""
        dh = np.float_power(d, h)
        return E0 + (Emax - E0) * (dh / (C**h + dh))

    def _model_to_fit(self, d, E0, Emax, logh, logC):
        """Hill equation expecting log-transformed parameters h and C parameters, for fitting."""
        return self._model(d, E0, Emax, np.exp(logh), np.exp(logC))

    def _model_inv(self, E, E0, Emax, h, C):
        """Inverse Hill equation."""
        E_ratio = (E - E0) / (Emax - E)
        d = np.float_power(E_ratio, 1.0 / h) * C

        # For any E's outside of the range E0 to Emax, E_inv should be nan
        if hasattr(E, "__iter__"):
            d[E_ratio < 0] = np.nan
            return d

        # Dose cannot be negative
        elif d < 0:
            return np.nan

        return d

    def _model_jacobian_for_fit(self, d, E0, Emax, logh, logC):
        """Hill equation jacobian, expecting log-transformed h and C, for fitting.

        Return
        ------
        jacobian : array_like
            Derivatives of the Hill equation with respect to E0, Emax, logh,
            and logC
        """
        h = np.exp(logh)
        d_pow_h = d**h
        C_pow_h = np.exp(logC) ** h
        logd = np.log(d)

        jE0 = 1 - d_pow_h / (C_pow_h + d_pow_h)
        jEmax = 1 - jE0

        jC = (E0 - Emax) * d_pow_h * h * C_pow_h / ((C_pow_h + d_pow_h) * (C_pow_h + d_pow_h))

        jh = (
            (Emax - E0)
            * d_pow_h
            * h
            * ((C_pow_h + d_pow_h) * logd - (logC * C_pow_h + logd * d_pow_h))
            / ((C_pow_h + d_pow_h) * (C_pow_h + d_pow_h))
        )

        jac = np.hstack((jE0.reshape(-1, 1), jEmax.reshape(-1, 1), jh.reshape(-1, 1), jC.reshape(-1, 1)))
        jac[np.isnan(jac)] = 0
        return jac

    def _get_initial_guess(self, d, E, p0):
        """Default initial guess is E0=E(dmin), Emax=E(dmax), h=1, C=median(d)"""
        if p0 is None:
            p0 = [np.median(E[d == min(d)]), np.median(E[d == max(d)]), 1, np.median(d) * self._dose_scale]

        return super()._get_initial_guess(d, E, p0)

    def _transform_params_from_fit(self, params):
        """Exponentiate h and C to get the final parameters."""
        return params[0], params[1], np.exp(params[2]), np.exp(params[3]) * self._dose_scale

    def _transform_params_to_fit(self, params):
        """Log-transform h and C for fitting."""
        with np.errstate(divide="ignore"):
            return params[0], params[1], np.log(params[2]), np.log(params[3] / self._dose_scale)

    def __repr__(self):
        if not self.is_specified:
            return "Hill()"

        return "Hill(E0=%0.3g, Emax=%0.3g, h=%0.3g, C=%0.3g)" % (self.E0, self.Emax, self.h, self.C)


[docs]class Hill_2P(Hill):
    """The two-parameter Hill equation

    E = E0 + (Emax - E0) * d^h / (C^h + d^h)

    Mathematically equivalent to the four-parameter Hill equation, but E0 and Emax are held constant (not fit to data).
    """

    def __init__(self, E0=1.0, Emax=0.0, **kwargs):
        self.E0 = E0
        self.Emax = Emax
        super().__init__(**kwargs)

    def _model_to_fit(self, d, logh, logC):
        return self._model(d, self.E0, self.Emax, np.exp(logh), np.exp(logC))

    def _model_jacobian_for_fit(self, d, logh, logC):
        h = np.exp(logh)
        d_pow_h = d**h
        C_pow_h = np.exp(logC) ** h
        squared_sum = np.float_power(C_pow_h + d_pow_h, 2.0)

        logd = np.log(d)

        E0 = self.E0
        Emax = self.Emax

        jC = (E0 - Emax) * (d_pow_h * h * C_pow_h / squared_sum)

        jh = (Emax - E0) * d_pow_h * h * ((C_pow_h + d_pow_h) * logd - (logC * C_pow_h + logd * d_pow_h)) / squared_sum

        jac = np.hstack((jh.reshape(-1, 1), jC.reshape(-1, 1)))
        jac[np.isnan(jac)] = 0
        return jac

    @property
    def _parameter_names(self) -> List[str]:
        return ["h", "C"]

    def _get_initial_guess(self, d, E, p0):
        if p0 is None:
            p0 = [1, np.median(d)]

        return super()._get_initial_guess(d, E, p0)

    def _set_parameters(self, popt):
        h, C = popt

        self.h = h
        self.C = C

    def _transform_params_from_fit(self, params):
        return np.exp(params[0]), np.exp(params[1]) * self._dose_scale

    def _transform_params_to_fit(self, params):
        with np.errstate(divide="ignore"):
            return np.log(params[0]), np.log(params[1] / self._dose_scale)

    def __repr__(self):
        if not self.is_specified:
            return "Hill_2P()"

        return "Hill_2P(E0=%0.3g, Emax=%0.3g, h=%0.3g, C=%0.3g)" % (
            self.E0,
            self.Emax,
            self.h,
            self.C,
        )


[docs]class Hill_CI(Hill_2P):
    """Model used to calculate Combination Index synergy.

    Mathematically this equivalent two-parameter Hill equation with E0=1 and Emax=0. However, Hill_CI.fit() uses a
    log-linearization approach to dose-response fitting used by the Combination Index.
    """

    def __init__(self, **kwargs):
        kwargs["E0"] = 1.0
        kwargs["Emax"] = 0.0
        super().__init__(**kwargs)

    def _fit(self, d, E, use_jacobian, **kwargs):
        """Override the parent function to use linregress() instead of curve_fit()"""
        mask = np.where((E < 1) & (E > 0) & (d > 0))
        E = E[mask]
        d = d[mask]
        fU = E
        fA = 1 - E

        median_effect_line = linregress(np.log(d), np.log(fA / fU))
        h = median_effect_line.slope
        C = np.exp(-median_effect_line.intercept / h)
        C *= self._dose_scale
        return (h, C)

    def plot_linear_fit(self, d, E, ax=None):
        if not self.is_specified:
            raise ModelNotParameterizedError()

        try:
            from matplotlib import pyplot as plt
        except ImportError:
            # TODO: Error
            # TODO: Move this whole function to utils.plot
            return
        mask = np.where((E < 1) & (E > 0) & (d > 0))
        E = E[mask]
        d = d[mask]
        fU = E
        fA = 1 - E

        ax_created = False
        if ax is None:
            ax_created = True
            fig = plt.figure(figsize=(4, 4))
            ax = fig.add_subplot(111)

        ax.scatter(np.log(d), np.log(fA / fU))
        ax.plot(np.log(d), np.log(d) * self.h - self.h * np.log(self.C))

        # Draw bootstraps with low opacity
        # for i in range(self.bootstrap_parameters.shape[0]):
        #     h, C = self.bootstrap_parameters[i, :]
        #     ax.plot(np.log(d), np.log(d) * h - h * np.log(C), c="k", alpha=0.1, lw=0.5)

        ax.set_ylabel("h * log(d) - h * log(C)")
        ax.set_xlabel("log(d)")
        ax.set_title("Combination Index linearization")
        if ax_created:
            plt.tight_layout()
            plt.show()

    def _bootstrap_resample(self, d, E, use_jacobian, bootstrap_iterations, **kwargs):
        """Bootstrap resampling is not yet implemented for CI"""

    def __repr__(self):
        if not self.is_specified:
            return "Hill_CI()"

        return "Hill_CI(h=%0.3g, C=%0.3g)" % (self.h, self.C)