Source code for mangadap.util.vander

"""
Implements a base class that performs linear least-squares fitting of
1D basis functions constructed via a pseudo-Vandermonde matrix.

----

.. include license and copyright
.. include:: ../include/copy.rst

----

.. include common links, assuming primary doc root is up one directory
.. include:: ../include/links.rst
"""
import os
import time

from IPython import embed

import numpy

from . import modeling

# TODO: Generalize this even further so that a user can provide their
# own "basis" functions. And change the name of this module to
# basis.py?

[docs]class Vander1D:
    r"""
    Base class for least-squares fitting using linear combinations of 1D
    basis functions.

    Args:
        vander (callable):
            Function used to generate the pseudo-Vandermonde matrix
            of the basis functions; e.g.,
            `numpy.polynomial.legendre.legvander`_. Calling sequence
            must be ``vander(x,order)``.
        x (array-like):
            Ordinate for fitting.
        order (:obj:`int`):
            Order of the fit.
        rcond (:obj:`float`, optional):
            Relative condition number of the fit. Singular values
            smaller than this relative to the largest singular value
            will be ignored. The default value is len(x)*eps, where eps
            is the relative precision of the float type, about 2e-16 in
            most cases.

    Attributes:
        x (`numpy.ndarray`_):
            See the instantiation argument.  Shape is :math:`(N_{\rm
            x},)`.
        v (`numpy.ndarray`_):
            Pseudo-Vandermonde matrix of the request fitting order,
            :math:`o`.  Shape is :math:`(o+1, N_{\rm x})`.
        rcond (:obj:`float`):
            See the instantiation argument.
    """

    def __init__(self, vander, x, order, rcond=None):
        self.x = numpy.atleast_1d(x)
        if self.x.ndim != 1:
            raise ValueError('Ordinate must be a vector.')

        self.v = vander(self.x, order).T
        self.rcond = self.x.size * numpy.finfo(self.x.dtype).eps if rcond is None else rcond

    @property
    def size(self):
        """The size of the coordinate vector."""
        return self.x.size

    @property
    def shape(self):
        """The shape of the coordinate vector."""
        return self.x.shape

    @property
    def order(self):
        """The order of the function."""
        return self.v.shape[0]-1

[docs]    def fit(self, y, w=None, err=None, mask=None, rej_iter=0, rej_lo=3., rej_hi=3., rej_win=None,
            model=False):
        r"""
        Fit the provided data.

        The ordinate is provided on the class instantiation.

        If provided, the errors, :math:`\epsilon`, are converted to
        weights to use during the fitting, where :math:`\omega_i =
        1/\epsilon_i`. Both weights, :math:`w`, and errors can be
        provided; however, in this case, the weights used during the
        fit are are :math:`\omega_i = w_i / \epsilon_i`.

        .. todo::
            
            Allow for the nominal calculation of the errors in the
            coefficients.

        Args:
            y (array-like):
                A 1D or 2D array with the 1D functions to fit. Shape
                is :math:`(N_x,)` or :math:`(N_{\rm vec},N_x)`, where
                :math:`N_x` is the length of the ordinate
                (:attr:`x`). Input can be a masked array.
            w (array-like, optional):
                A 1D or 2D array with the weights for each pixel.
                Shape is :math:`(N_x,)` or :math:`(N_{\rm vec},N_x)`,
                where :math:`N_x` is the length of the ordinate
                (:attr:`x`). If ``w`` is 1D, but ``y`` is 2D, the
                same weights are applied to all vectors.
            err (array-like, optional):
                Error in ``y``, used for weighting. Shape must match
                ``y``.
            mask (array-like, optional):
                A boolean mask; values to ignore should have ``mask
                == True``. Shape must match ``y``. Treated
                identically as a mask provided by passing in a
                `numpy.ma.MaskedArray`_ for ``y``. Note that
                providing a weight of 0 is identical to masking the
                value.
            rej_iter (:obj:`int`, optional):
                Number of fit rejection iterations.  If 0, no rejection
                iterations are performed.  If -1, rejections are
                performed until no points are rejected in a given
                iteration.
            rej_lo (scalar-like, optional):
                The lower sigma rejection threshold.
            rej_hi (scalar-like, optional):
                The higher sigma rejection threshold.
            rej_win (:obj:`int`, optional):
                The length of a boxcar window to use in a calculation of
                a *local* standard deviation used during the rejection
                iterations.  This is useful if the S/N varies strongly
                within the provided vector being fit.  If None, no
                windowing is applied, meaning a *global* standard
                deviation is used for rejection.
            model (:obj:`bool`, optional):
                If True, return the model as well as the best fitting
                coefficients.

        Returns:
            `numpy.ndarray`, :obj:`tuple`: As many as three objects
            can be returned. Regardless of the input, the function
            always returns an array with the best-fitting
            coefficients, with shape :math:`(o+1,)` or :math:`(N_{\rm
            vec},o+1)`, depending on the input shape of ``y``. If
            ``model`` is True, an array with the best-fitting models
            are also returned (same shape as ``y``). If rejection
            operations are performed, a boolean array flagging the
            pixels excluded from the fit is also returned. Note that
            any data that will have been ignored based on the input
            (the element is masked or has 0 weight) is *not* flagged
            as having been rejected.
        """
        # Check the input vectors to fit
        _y = numpy.atleast_1d(y)
        if _y.ndim != 1 and _y.ndim != 2:
            raise ValueError('Input y coordinates must be a single vector or 2D array.')
        if _y.shape[-1] != self.x.size:
            raise ValueError('The last dimension of y must have the same length as the ordinate.')

        # Setup the weights
        if w is None:
            # Weights are needed if rejecting
            _w = None if rej_iter == 0 else numpy.ones(_y.shape, dtype=float) 
        else:
            _w = numpy.atleast_1d(w)
            if _w.ndim != 1 and _w.ndim != 2:
                raise ValueError('Input weights must be a single vector or 2D array.')
            if _w.ndim == 1 and _w.shape != self.x.shape:
                raise ValueError('If a single vector, the weights must have the same shape as '
                                 'the ordinate vector.')
            if rej_iter != 0 and _w.ndim != _y.ndim:
                # If rejecting, weights must be applied to each vector
                # separately
                _w = numpy.tile(_w, (_y.shape[0],1))
            if _w.ndim == 2 and _w.shape != _y.shape:
                raise ValueError('If an array, the weights must have the same shape as the data '
                                 'to fit.')
            if numpy.any(_w < 0):
                raise ValueError('Weights must be positive!')
        if isinstance(_y, numpy.ma.MaskedArray):
            _m = numpy.logical_not(numpy.ma.getmaskarray(_y)).astype(float)
            _w = _m if _w is None else _m * _w
            _y = _y.filled(0.0)
        if mask is not None:
            _m = numpy.logical_not(numpy.atleast_1d(mask)).astype(float)
            if _m.shape != _y.shape:
                raise ValueError('Mask must have the same shape as the data to fit.')
            _w = _m if _w is None else _m * _w

        # Check the input error and add it to the weights, if provided
        if err is not None:
            # Below is faster than numpy.ma.power(err, -1).filled(0.0)
            # and avoids errors when err is 0.
            _e = err.filled(0.0) if isinstance(err, numpy.ma.MaskedArray) else err
            _e = (_e > 0)/(_e + (_e == 0.))
            if _e.shape != _y.shape:
                raise ValueError('Input errors must have the same shape as the data to fit.')
            _w = _e if _w is None else _e * _w

        # Fit the data
        c = self._single_fit(_y, _w)

        # If no rejection iterations, we're done
        if rej_iter == 0:
            return (c, self.model(c)) if model else c

        # Initial mask
        init_good = _w > 0

        # Iteratively reject
        i = 0
        while i < rej_iter or rej_iter < 0:
            # Setup a masked array to hold the weighted residuals
            resid = _w * numpy.ma.MaskedArray(_y - self.model(c), mask=numpy.logical_not(_w > 0))
            # Reject pixels
            rej = modeling.reject_residuals_1d(resid, lo=rej_lo, hi=rej_hi, boxcar=rej_win)
            # Determine if any were rejected
            refit = numpy.any(rej, axis=-1)
            if not numpy.any(refit):
                # None were rejected so we're done
                break
            # Set the weight to 0 for the rejected data
            _w *= numpy.logical_not(rej).astype(float)
            # Refit
            c = self._single_fit(_y, _w)
            # Increment iteration
            i += 1

        rejected = numpy.logical_not(_w > 0) & init_good
        return (c, self.model(c), rejected) if model else (c, rejected)

[docs]    def model(self, c):
        r"""
        Return the model function constructed using the provided
        coefficients.

        Args:
            c (array-like):
                The coefficients for the model with shape shape
                :math:`(o+1,)` or :math:`(N_{\rm vec},o+1)`, where
                :math:`o` is the order of the function.

        Returns:
            `numpy.ndarray`_: Array with the evaluated functions. The
            output shape is :math:`(N_x,)` or :math:`(N_{\rm
            vec},N_x)` depending on the input shape of ``c``.
        """
        return numpy.dot(c, self.v)

[docs]    def _single_fit(self, y, w):
        r"""
        Perform a single fitting iteration.

        Args:
            y (`numpy.ndarray`_):
                The data to fit.  Shape is :math:`(N_{\rm x},)` or
                :math:`(N_{\rm vec}, N_{\rm x})`.  If the latter, a fit
                is done to each vector.
            w (`numpy.ndarray`_):
                Weights.  Shape is :math:`(N_{\rm x},)` or
                :math:`(N_{\rm vec}, N_{\rm x})`.  If the latter, this
                call is equivalent to looping over :math:`N_{\rm vec}`
                and fitting each vector in ``y`` individually.  Can be
                None, and if so the fit is unweighted.
        
        Returns:
            `numpy.ndarray`_: The best-fitting coefficients for the
            basis functions with order :math:`o`.  Shape is
            :math:`(o+1,)` or :math:`(N_{\rm vec},o+1)`.
        """
        if w is None:
            # Unweighted fit. For multiple y vectors, make sure
            # returned array has the shape described by the return
            # statement above.
            return numpy.linalg.lstsq(self.v.T, y.T, rcond=self.rcond)[0].T
        
        if y.ndim > 1 and w.shape == y.shape:
            # If weighting, can only solve all fits simultaneously if
            # the same weights are used for all vectors. Therefore,
            # need to loop through all y vectors here to accommodate
            # weights that may be different fro each y vector.
            c = numpy.zeros((y.shape[0], self.v.shape[0]), dtype=float)
            for i in range(y.shape[0]):
                c[i] = self._single_fit(y[i], w[i])
            return c

        # w is a single vector, used when fitting one or more y
        # vectors.
        lhs = self.v * w
        rhs = y * w
        scl = numpy.sqrt(numpy.square(lhs).sum(axis=1))
        scl[scl == 0] == 1
        c = numpy.linalg.lstsq(lhs.T/scl, rhs.T, rcond=self.rcond)[0].T
        return c/scl


[docs]class Legendre1D(Vander1D):
    """
    Least-squares fitting of 1D Legendre functions.

    Args:
        x (array-like):
            Ordinate for fitting.
        order (:obj:`int`):
            Order of the fit.
        rcond (:obj:`float`, optional):
            Relative condition number of the fit. Singular values
            smaller than this relative to the largest singular value
            will be ignored. The default value is len(x)*eps, where eps
            is the relative precision of the float type, about 2e-16 in
            most cases.
    """
    def __init__(self, x, order, rcond=None):
        super(Legendre1D, self).__init__(numpy.polynomial.legendre.legvander, x, order,
                                         rcond=rcond)