Source code for arviz.plots.hdiplot

"""Plot highest density intervals for regression data."""
import warnings

import numpy as np
from scipy.interpolate import griddata
from scipy.signal import savgol_filter
from xarray import Dataset

from ..rcparams import rcParams
from ..stats import hdi
from .plot_utils import get_plotting_function

[docs]def plot_hdi( x, y=None, hdi_prob=None, hdi_data=None, color="C1", circular=False, smooth=True, smooth_kwargs=None, figsize=None, fill_kwargs=None, plot_kwargs=None, hdi_kwargs=None, ax=None, backend=None, backend_kwargs=None, show=None, ): r""" Plot HDI intervals for regression data. Parameters ---------- x : array-like Values to plot. y : array-like, optional Values from which to compute the HDI. Assumed shape ``(chain, draw, \*shape)``. Only optional if ``hdi_data`` is present. hdi_data : array_like, optional Precomputed HDI values to use. Assumed shape is ``(*x.shape, 2)``. hdi_prob : float, optional Probability for the highest density interval. Defaults to ``stats.hdi_prob`` rcParam. color : str, optional Color used for the limits of the HDI and fill. Should be a valid matplotlib color. circular : bool, optional Whether to compute the HDI taking into account ``x`` is a circular variable (in the range [-np.pi, np.pi]) or not. Defaults to False (i.e non-circular variables). smooth : boolean, optional If True the result will be smoothed by first computing a linear interpolation of the data over a regular grid and then applying the Savitzky-Golay filter to the interpolated data. Defaults to True. smooth_kwargs : dict, optional Additional keywords modifying the Savitzky-Golay filter. See :func:`scipy:scipy.signal.savgol_filter` for details. figsize : tuple Figure size. If None it will be defined automatically. fill_kwargs : dict, optional Keywords passed to :meth:`mpl:matplotlib.axes.Axes.fill_between` (use ``fill_kwargs={'alpha': 0}`` to disable fill) or to :meth:`bokeh.plotting.Figure.patch`. plot_kwargs : dict, optional HDI limits keyword arguments, passed to :meth:`mpl:matplotlib.axes.Axes.plot` or :meth:`bokeh.plotting.Figure.patch`. hdi_kwargs : dict, optional Keyword arguments passed to :func:`~arviz.hdi`. Ignored if ``hdi_data`` is present. ax : axes, optional Matplotlib axes or bokeh figures. backend : {"matplotlib","bokeh"}, optional Select plotting backend. backend_kwargs : bool, optional These are kwargs specific to the backend being used, passed to :meth:`mpl:matplotlib.axes.Axes.plot` or :meth:`bokeh.plotting.Figure.patch`. show : bool, optional Call backend show function. Returns ------- axes : matplotlib axes or bokeh figures See Also -------- hdi : Calculate highest density interval (HDI) of array for given probability. Examples -------- Plot HDI interval of simulated regression data using `y` argument: .. plot:: :context: close-figs >>> import numpy as np >>> import arviz as az >>> x_data = np.random.normal(0, 1, 100) >>> y_data = np.random.normal(2 + x_data * 0.5, 0.5, size=(2, 50, 100)) >>> az.plot_hdi(x_data, y_data) ``plot_hdi`` can also be given precalculated values with the argument ``hdi_data``. This example shows how to use :func:`~arviz.hdi` to precalculate the values and pass these values to ``plot_hdi``. Similarly to an example in ``hdi`` we are using the ``input_core_dims`` argument of :func:`~arviz.wrap_xarray_ufunc` to manually define the dimensions over which to calculate the HDI. .. plot:: :context: close-figs >>> hdi_data = az.hdi(y_data, input_core_dims=[["draw"]]) >>> ax = az.plot_hdi(x_data, hdi_data=hdi_data[0], color="r", fill_kwargs={"alpha": .2}) >>> az.plot_hdi(x_data, hdi_data=hdi_data[1], color="k", ax=ax, fill_kwargs={"alpha": .2}) ``plot_hdi`` can also be used with Inference Data objects. Here we use the posterior predictive to plot the HDI interval. .. plot:: :context: close-figs >>> X = np.random.normal(0,1,100) >>> Y = np.random.normal(2 + X * 0.5, 0.5, size=(2,10,100)) >>> idata = az.from_dict(posterior={"y": Y}, constant_data={"x":X}) >>> x_data = idata.constant_data.x >>> y_data = idata.posterior.y >>> az.plot_hdi(x_data, y_data) """ if hdi_kwargs is None: hdi_kwargs = {} x = np.asarray(x) x_shape = x.shape if y is None and hdi_data is None: raise ValueError("One of {y, hdi_data} is required") if hdi_data is not None and y is not None: warnings.warn("Both y and hdi_data arguments present, ignoring y") elif hdi_data is not None: hdi_prob = ( hdi_data.hdi.attrs.get("hdi_prob", np.nan) if hasattr(hdi_data, "hdi") else np.nan ) if isinstance(hdi_data, Dataset): data_vars = list(hdi_data.data_vars) if len(data_vars) != 1: raise ValueError( "Found several variables in hdi_data. Only single variable Datasets are " "supported." ) hdi_data = hdi_data[data_vars[0]] else: y = np.asarray(y) if hdi_prob is None: hdi_prob = rcParams["stats.hdi_prob"] else: if not 1 >= hdi_prob > 0: raise ValueError("The value of hdi_prob should be in the interval (0, 1]") hdi_data = hdi(y, hdi_prob=hdi_prob, circular=circular, multimodal=False, **hdi_kwargs) hdi_shape = hdi_data.shape if hdi_shape[:-1] != x_shape: msg = ( "Dimension mismatch for x: {} and hdi: {}. Check the dimensions of y and" "hdi_kwargs to make sure they are compatible" ) raise TypeError(msg.format(x_shape, hdi_shape)) if smooth: if smooth_kwargs is None: smooth_kwargs = {} smooth_kwargs.setdefault("window_length", 55) smooth_kwargs.setdefault("polyorder", 2) x_data = np.linspace(x.min(), x.max(), 200) x_data[0] = (x_data[0] + x_data[1]) / 2 hdi_interp = griddata(x, hdi_data, x_data) y_data = savgol_filter(hdi_interp, axis=0, **smooth_kwargs) else: idx = np.argsort(x) x_data = x[idx] y_data = hdi_data[idx] hdiplot_kwargs = dict( ax=ax, x_data=x_data, y_data=y_data, color=color, figsize=figsize, plot_kwargs=plot_kwargs, fill_kwargs=fill_kwargs, backend_kwargs=backend_kwargs, show=show, ) if backend is None: backend = rcParams["plot.backend"] backend = backend.lower() plot = get_plotting_function("plot_hdi", "hdiplot", backend) ax = plot(**hdiplot_kwargs) return ax