Source code for astropy.nddata.covariance

# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
Implements a class used to store and manipulate covariance matrices.
"""

import warnings

import numpy as np

from astropy import table
from astropy.units import Quantity
from astropy.utils.compat.optional_deps import HAS_SCIPY
from astropy.utils.exceptions import AstropyUserWarning

from .nddata import NDUncertainty

# Required scipy.sparse imports
if HAS_SCIPY:
    from scipy.sparse import coo_matrix, csr_matrix, find, isspmatrix_csr, triu
else:
    err = "Use of 'astropy.nddata.Covariance' requires 'scipy.sparse' module!"

    def find(*args, **kwargs):
        raise ModuleNotFoundError(err)

    def triu(*args, **kwargs):
        raise ModuleNotFoundError(err)

    def csr_matrix(*args, **kwargs):
        raise ModuleNotFoundError(err)

    def coo_matrix(*args, **kwargs):
        raise ModuleNotFoundError(err)

    def isspmatrix_csr(*args, **kwargs):
        raise ModuleNotFoundError(err)


__all__ = ["Covariance"]


# Disabling doctests when scipy isn't present.
__doctest_requires__ = {"*": ["scipy"]}


def _get_csr(arr):
    """
    Helper method used to confirm the array is a `~scipy.sparse.csr_matrix` or
    try to convert it to one.

    Parameters
    ----------
    arr : array-like
        An array that either is a `~scipy.sparse.csr_matrix` or can be converted
        to one.

    Returns
    -------
    `~scipy.sparse.csr_matrix`
        Converted or original matrix.
    """
    # Check the input type
    if isspmatrix_csr(arr):
        return arr
    try:
        return csr_matrix(arr)
    except ValueError:
        raise TypeError(
            "Input matrix is not a scipy.sparse.csr_matrix and could "
            "not be converted to one."
        )


def _impose_sparse_value_threshold(arr, threshold):
    """
    Remove values from a sparse matrix if their absolute value is below the
    provided threshold.

    Parameters
    ----------
    arr : `~scipy.sparse.csr_matrix`
        Array to manipulate.

    threshold : :obj:`float`
        Threshold value

    Returns
    -------
    `~scipy.sparse.csr_matrix`
        Manipulated or original matrix.
    """
    i, j, aij = find(arr)
    index = np.logical_not(np.absolute(aij) < threshold)
    if all(index):
        return arr
    return coo_matrix((aij[index], (i[index], j[index])), shape=arr.shape).tocsr()


def _parse_shape(shape):
    """
    Parse a string representation of an array shape into a tuple.

    Parameters
    ----------
    shape : str
        String representation of the tuple.  It should only contain
        comma-separated integers and parentheses.

    Returns
    -------
    tuple
        Tuple with the shape.
    """
    return tuple([int(n) for n in shape.strip("()").split(",") if len(n) > 0])


[docs] class Covariance(NDUncertainty): r""" A general utility for storing, manipulating, and I/O of covariance matrices. Covariance matrices are symmetric by definition, :math:`\Sigma_{ij} = \Sigma_{ji}`. The object therefore only stores the upper triangle of the matrix using a `scipy.sparse.csr_matrix`. By default, instantiation will check for symmetry and issue a warning if the matrix is not symmetric. This check can be skipped using the ``assume_symmetric`` keyword. However, by virtue of how the data is stored, symmetry is *always imposed* on the matrix. That is, if a non-symmetric matrix is used to instantiate a `Covariance` object, the stored data will yield a matrix that is different from the original input. Covariance matrices of higher dimensional arrays are always assumed to be stored following row-major indexing. For example, the covariance value :math:`\Sigma_{ij}` for an image of size :math:`(N_x,N_y)` is the covariance between image pixels :math:`I_{x_i,y_i}` and :math:`I_{x_j,y_j}`, where :math:`i = x_i + N_x y_i` and, similarly, :math:`j = x_j + N_x y_j`. See :ref:`nddata-covariance` for additional documentation and examples. Parameters ---------- array : array-like, `~scipy.sparse.csr_matrix` Covariance matrix to store. If the array is not a `~scipy.sparse.csr_matrix` instance, it must be convertible to one. To match the calling sequence for `NDUncertainty`, ``array`` has a default value of None, but the array *must* be provided for this `Covariance` object. data_shape : :obj:`tuple`, optional The covariance data is for a higher dimensional array with this shape. For example, if the covariance data is for a 2D image with shape ``(nx,ny)``, set ``data_shape=(nx,ny)``; the shape of the covariance array must then be ``(nx*ny, nx*ny)``. If None, any higher dimensionality is ignored. assume_symmetric : bool, optional Assume the matrix is symmetric. This means that a check for symmetry is not performed, and the user is not warned if the matrix is not symmetric. unit : unit-like, optional Unit for the covariance values. Raises ------ TypeError Raised if the input array not a `~scipy.sparse.csr_matrix` object and cannot be converted to one. ValueError Raised if ``data_shape`` is provided and the input covariance matrix ``array`` does not have the expected shape or if ``array`` is None. """ def __init__(self, array=None, data_shape=None, assume_symmetric=False, unit=None): if array is None: raise ValueError("Covariance object cannot be instantiated with None.") # Ingest the matrix self._cov = triu( Covariance._ingest_matrix(array, assume_symmetric=assume_symmetric) ) # Save the diagonal as a variance array for convenience self._var = self._cov.diagonal() # Set the shape and check it; note self._cov must be defined so that # call to self.shape below is valid. self._data_shape = data_shape if self._data_shape is not None and np.prod(self._data_shape) != self.shape[0]: raise ValueError( "Product of ``data_shape`` must match the covariance axis length." ) # Workspace for index mapping from flattened to original data arrays self._data_index_map = None super().__init__(array=self._cov, copy=False, unit=unit) @staticmethod def _ingest_matrix(arr, assume_symmetric=False): """ Helper method to ingest a covariance or correlation matrix. This function converts the input to a '~scipy.sparse.csr_matrix` using :func:`_get_csr`, and checks that the array is 2D, square, and symmetric. Parameters ---------- arr : array-like An array that either is a `~scipy.sparse.csr_matrix` or can be converted to one. assume_symmetric : bool, optional Assume the matrix is symmetric. This means that a check for symmetry is not performed, and the user is not warned if the matrix is not symmetric. Returns ------- `~scipy.sparse.csr_matrix` Converted or original matrix. """ # Make sure it's a sparse matrix or can be converted to one. _arr = _get_csr(arr) # Check that it's 2D if _arr.ndim != 2: raise ValueError("Covariance arrays must be 2-dimensional.") # Check that it's square if _arr.shape[0] != _arr.shape[1]: raise ValueError("Covariance matrices must be square.") # Skip the symmetry check, if requested if assume_symmetric: return _arr # Check that it's symmetric flip_diff = _arr - _arr.T if not np.allclose(flip_diff.data, np.zeros_like(flip_diff.data)): warnings.warn( "Asymmetry detected in covariance/correlation matrix. Matrix will be modified " "to be symmetric using its upper triangle.", AstropyUserWarning, ) _arr = triu(_arr) + triu(_arr, 1).T return _arr @property def shape(self): """Tuple with the shape of the covariance matrix""" return self._cov.shape @property def nnz(self): """ The number of non-zero (NNZ) elements in the full covariance matrix, *including* both the upper and lower triangles. """ return self.stored_nnz * 2 - self._cov.shape[0] @property def stored_nnz(self): """ The number of non-zero elements stored by the object, which only counts the non-zero elements in the upper triangle. """ return self._cov.nnz @property def variance(self): """ The diagonal of the covariance matrix. """ return self._var @variance.setter def variance(self, value): raise NotImplementedError( "Directly setting variance values is not allowed for Covariance objects." ) @property def uncertainty_type(self): """``"cov"``: `Covariance` implements a covariance matrix.""" return "cov" @property def quantity(self): """ The covariance matrix as an dense `~astropy.units.Quantity` object. """ return Quantity(self.to_dense(), self.unit, copy=False, dtype=self._cov.dtype) def _data_unit_to_uncertainty_unit(self, value): """ Return the uncertainty unit for covariances given the data unit. """ return value**2 def __repr__(self): return f"<{self.__class__.__name__}; shape = {self.shape}>" # Skip error propagation for now def _propagate_add(self, other_uncert, result_data, correlation): return None def _propagate_subtract(self, other_uncert, result_data, correlation): return None def _propagate_multiply(self, other_uncert, result_data, correlation): return None def _propagate_divide(self, other_uncert, result_data, correlation): return None
[docs] @classmethod def from_samples(cls, samples, cov_tol=None, rho_tol=None, **kwargs): r""" Build a covariance object using discrete samples. The covariance is generated using `~numpy.cov` for a set of discretely sampled data for an :math:`N`-dimensional parameter space. Parameters ---------- samples : `~numpy.ndarray` Array with samples drawn from an :math:`N`-dimensional parameter space. The shape of the input array must be :math:`N_{\rm par}\times N_{\rm samples}`. cov_tol : :obj:`float`, optional The absolute value of any *covariance matrix* entry less than this is assumed to be equivalent to (and set to) 0. rho_tol : :obj:`float`, optional The absolute value of any *correlation coefficient* less than this is assumed to be equivalent to (and set to) 0. **kwargs : dict, optional Passed directly to main instantiation method. Returns ------- `Covariance` An :math:`N_{\rm par}\times N_{\rm par}` covariance matrix built using the provided samples. Raises ------ ValueError Raised if the input array is not 2D or if the number of samples (length of the second axis) is less than 2. """ if samples.ndim != 2: raise ValueError("Input samples for covariance matrix must be a 2D array!") if samples.shape[1] < 2: raise ValueError("Fewer than two samples provided!") return Covariance.from_array( np.cov(samples), cov_tol=cov_tol, rho_tol=rho_tol, **kwargs )
[docs] @classmethod def from_array(cls, covar, cov_tol=None, rho_tol=None, **kwargs): r""" Define a covariance object from an array. .. note:: The only difference between this method and the direct instantiation method (i.e., ``Covariance(array=covar)``) is that it can be used to impose tolerances on the covariance value and/or correlation coefficients. Parameters ---------- covar : array-like Array with the covariance data. The object must be either a `~scipy.sparse.csr_matrix` or an object that can be converted to one. It must also be 2-dimensional and square. cov_tol : :obj:`float`, optional The absolute value of any *covariance* matrix entry less than this is assumed to be equivalent to (and set to) 0. rho_tol : :obj:`float`, optional The absolute value of any *correlation coefficient* less than this is assumed to be equivalent to (and set to) 0. **kwargs : dict, optional Passed directly to main instantiation method. Returns ------- `Covariance` The covariance matrix built using the provided array. """ # Get the assume_symmetric flag, either from kwargs or as the default # value assume_symmetric = kwargs.pop("assume_symmetric", False) # Convert the covariance to a correlation matrix. If rho_tol is None, # this just serves to symmetrize the matrix if it's not already. Set # assume_symmetric to True hereafter var, rho = Covariance.to_correlation(covar, assume_symmetric=assume_symmetric) if rho_tol is not None: rho = _impose_sparse_value_threshold(rho, rho_tol) _covar = Covariance.revert_correlation(var, rho, assume_symmetric=True) if cov_tol is not None: _covar = _impose_sparse_value_threshold(_covar, cov_tol) return cls(array=_covar, assume_symmetric=True, **kwargs)
[docs] @classmethod def from_table(cls, triu_covar): r""" Construct the covariance matrix from a table with the non-zero elements of the upper triangle of the covariance matrix in coordinate format. This is the inverse operation of :func:`to_table`. The class can read covariance data written by other programs *as long as they have a commensurate format*; see :func:`to_table`. Parameters ---------- triu_covar : `~astropy.table.Table` The non-zero elements of the upper triangle of the covariance matrix in coordinate format; see :func:`to_table`. Returns ------- `Covariance` The covariance matrix constructed from the tabulated data. Raises ------ ValueError Raised if ``triu_covar.meta`` is ``None``, if the provided variance array does not have the correct size, or if the data is multidimensional and the table columns do not have the right shape. """ # Read shapes if "COVSHAPE" not in triu_covar.meta: raise ValueError("Table meta dictionary *must* contain COVSHAPE") shape = _parse_shape(triu_covar.meta["COVSHAPE"]) data_shape = ( _parse_shape(triu_covar.meta["COVDSHP"]) if "COVDSHP" in triu_covar.meta else None ) # Number of non-zero elements nnz = len(triu_covar) # Read coordinate data # WARNING: If the data is written correctly, it should always be true that i<=j if data_shape is None: i = triu_covar["INDXI"].data j = triu_covar["INDXJ"].data else: ndim = triu_covar["INDXI"].shape[1] if len(data_shape) != ndim: raise ValueError("Mismatch between COVDSHP keyword and tabulated data.") i = np.ravel_multi_index(triu_covar["INDXI"].data.T, data_shape) j = np.ravel_multi_index(triu_covar["INDXJ"].data.T, data_shape) # Units unit = triu_covar.meta.get("BUNIT", None) # Set covariance data cij = triu_covar["COVARIJ"].data # NOTE: the astype conversion of cij when instantiating the matrix below # is because of how scipy.sparse restricts instantiation of sparse # arrays. It doesn't like big-endian byte order. To reproduce the # underlying error: # >>> import numpy as np # >>> from scipy.sparse._sputils import getdtype # >>> getdtype(np.dtype('>f8')) # Traceback (most recent call last): # File "<stdin>", line 1, in <module> # File "/Users/westfall/.virtualenvs/astropy/lib/python3.12/site-packages/scipy/sparse/_sputils.py", line 137, in getdtype # raise ValueError(f"scipy.sparse does not support dtype {newdtype.name}. " # ValueError: scipy.sparse does not support dtype float64. The only supported types are: bool, int8, uint8, int16, uint16, int32, uint32, int64, uint64, longlong, ulonglong, float32, float64, longdouble, complex64, complex128, clongdouble. # >>> getdtype(np.dtype('<f8')) # dtype('float64') cov = coo_matrix((cij.astype(cij.dtype.type), (i, j)), shape=shape).tocsr() # Instantiate. Set assume_symmetric to true to avoid the warning from # the _ingest_matrix method return cls(array=cov, data_shape=data_shape, unit=unit, assume_symmetric=True)
[docs] @classmethod def from_matrix_multiplication(cls, T, covar, **kwargs): r""" Construct the covariance matrix that results from a matrix multiplication. Linear operations on a dataset (e.g., binning or smoothing) can be written as matrix multiplications of the form .. math:: {\mathbf y} = {\mathbf T}\ {\mathbf x}, where :math:`{\mathbf T}` is a transfer matrix of size :math:`N_y\times N_x`, :math:`{\mathbf x}` is a vector of size :math:`N_x`, and :math:`{\mathbf y}` is a vector of length :math:`{N_y}` that results from the multiplication. If :math:`{\mathbf \Sigma}_x` is the covariance matrix for :math:`{\mathbf x}`, then the covariance matrix for :math:`{\mathbf Y}` is .. math:: {\mathbf \Sigma}_y = {\mathbf T}\ {\mathbf \Sigma}_x\ {\mathbf T}^\top. If ``covar`` is provided as a vector of length :math:`N_x`, it is assumed that the elements of :math:`{\mathbf X}` are independent and the provided vector gives the *variance* in each element; i.e., the provided data represent the diagonal of :math:`{\mathbf \Sigma}`. Parameters ---------- T : `~scipy.sparse.csr_matrix`, `~numpy.ndarray` Transfer matrix. See above. covar : `~scipy.sparse.csr_matrix`, `~numpy.ndarray` Covariance matrix. See above. **kwargs : dict, optional Passed directly to main instantiation method. Returns ------- `Covariance` The covariance matrix resulting from the matrix multiplication. Raises ------ ValueError Raised if the provided arrays are not two dimensional or if there is a shape mismatch. """ if T.ndim != 2: raise ValueError("Input transfer matrix must be two-dimensional.") nx = T.shape[1] if covar.shape != (nx, nx) and covar.shape != (nx,): raise ValueError( f"Shape of input variance matrix must be either ({nx}, {nx}) or ({nx},)." ) # If it isn't already, convert T to a csr_matrix _T = T if isinstance(T, csr_matrix) else csr_matrix(T) # Set the covariance matrix in X _covar = ( coo_matrix((covar, (np.arange(nx), np.arange(nx))), shape=(nx, nx)).tocsr() if covar.ndim == 1 else (covar if isinstance(covar, csr_matrix) else csr_matrix(covar)) ) # Construct the covariance matrix return cls(_T.dot(_covar.dot(_T.transpose())).tocsr(), **kwargs)
[docs] @classmethod def from_variance(cls, variance, **kwargs): """ Construct a diagonal covariance matrix using the provided variance. Parameters ---------- variance : `~numpy.ndarray` The variance vector. **kwargs : dict, optional Passed directly to main instantiation method. Returns ------- `Covariance` The diagonal covariance matrix. """ return cls(csr_matrix(np.diagflat(variance)), **kwargs)
[docs] def to_sparse(self, correlation=False): """ Return the full covariance matrix as a `~scipy.sparse.csr_matrix` object. This method is essentially equivalent to `to_dense` except that it returns a sparse array. Parameters ---------- correlation : :obj:`bool`, optional Return the *correlation* matrix. If False, return the covariance matrix. Returns ------- `~scipy.sparse.csr_matrix` The sparse matrix with both the upper and lower triangles filled (with symmetric information). """ cov = triu(self._cov) + triu(self._cov, 1).T if not correlation: return cov return Covariance.to_correlation(cov, assume_symmetric=True)[1]
[docs] def apply_new_variance(self, var): """ Using the same correlation coefficients, return a new `Covariance` object with the provided variance. Parameters ---------- var : array-like Variance vector. Must have a length that matches this `Covariance` instance; e.g., if this instance is ``cov``, the length of ``var`` must be ``cov.shape[0]``). Note that, if the covariance is for higher dimensional data, this variance array *must* be flattened to 1D. Returns ------- `Covariance` A covariance matrix with the same shape and correlation coefficients and this object, but with the provided variance. Raises ------ ValueError Raised if the length of the variance vector is incorrect. """ _var = np.asarray(var) if _var.shape != self._var.shape: raise ValueError( f"Provided variance has incorrect shape. Expected {self._var.shape}, " f"found {_var.shape}." ) i, j, cij = find(self._cov) _cov = coo_matrix( (cij * np.sqrt(_var[i] / self._var[i] * _var[j] / self._var[j]), (i, j)), shape=self.shape, ).tocsr() return Covariance( array=_cov, data_shape=self._data_shape, assume_symmetric=True )
[docs] def copy(self): """ Return a copy of this Covariance object. Returns ------- `Covariance` A copy of the current covariance matrix. """ # Create the new Covariance instance with a copy of the data return Covariance( array=self._cov.copy(), data_shape=self._data_shape, assume_symmetric=True, unit=self.unit, )
[docs] def to_dense(self, correlation=False): """ Return the full covariance matrix as a `numpy.ndarray` object (a "dense" array). Parameters ---------- correlation : bool, optional Flag to return the correlation matrix, instead of the covariance matrix. Note that setting this to ``True`` does *not* also return the variance vector. Returns ------- `~numpy.ndarray` Dense array with the full covariance matrix. """ return self.to_sparse(correlation=correlation).toarray()
[docs] def find(self, correlation=False): """ Find the non-zero values in the **full** covariance matrix (not just the upper triangle). This is a simple wrapper for `to_sparse` and `~scipy.sparse.find`. Parameters ---------- correlation : bool, optional Flag to return the correlation data, instead of the covariance data. Note that setting this to ``True`` does *not* also return the variance vector. Returns ------- i, j : `numpy.ndarray` Arrays containing the index coordinates of the non-zero values in the covariance (or correlation) matrix. c : `numpy.ndarray` The non-zero covariance (or correlation) matrix values located at the provided ``i,j`` coordinates. """ return find(self.to_sparse(correlation=correlation))
[docs] def covariance_to_data_indices(self, i, j): r""" Given indices along the two axes of the covariance matrix, return the relevant indices in the data array. This is the inverse of :func:`data_to_covariance_indices`. Parameters ---------- i : `~numpy.ndarray` 1D array with the index along the first axis of the covariance matrix. Must be in the range :math:`0...n-1`, where :math:`n` is the length of the covariance-matrix axes. j : `~numpy.ndarray` 1D array with the index along the second axis of the covariance matrix. Must be in the range :math:`0...n-1`, where :math:`n` is the length of the covariance-matrix axes. Returns ------- i_data, i_data : tuple, `numpy.ndarray` If `data_shape` is not defined, the input arrays are simply returned (and not copied). Otherwise, the code uses `~numpy.unravel_index` to calculate the relevant data-array indices; each element in the two-tuple is itself a tuple of :math:`N_{\rm dim}` arrays, one array per dimension of the data array. Raises ------ ValueError Raised if the provided indices fall outside the range of covariance matrix. """ if self._data_shape is None: if np.any( (i < 0) | (i > self.shape[0] - 1) | (j < 0) | (j > self.shape[1] - 1) ): raise ValueError( "Some indices not valid for covariance matrix with shape " f"{self.shape}." ) return i, j return np.unravel_index( np.atleast_1d(i).ravel(), self._data_shape ), np.unravel_index(np.atleast_1d(j).ravel(), self._data_shape)
[docs] def data_to_covariance_indices(self, i, j): r""" Given indices of elements in the source data array, return the matrix coordinates with the associated covariance. This is the inverse of :func:`covariance_to_data_indices`. Parameters ---------- i : array-like, tuple A tuple of :math:`N_{\rm dim}` array-like objects providing the indices of elements in the N-dimensional data array. This can be an array-like object if ``data_shape`` is undefined, in which case the values must be in the range :math:`0...n-1`, where :math:`n` is the length of the data array. j : array-like, tuple The same as ``i``, but providing a second set of coordinates at which to access the covariance. Returns ------- i_covar, j_covar : `numpy.ndarray` Arrays providing the indices in the covariance matrix associated with the provided data array coordinates. If ``data_shape`` is not defined, the input arrays are simply returned (and not copied). Otherwise, the code uses `~numpy.ravel_multi_index` to calculate the relevant covariance indices. Raises ------ ValueError Raised if the provided indices fall outside the range of data array, or if the length of the ``i`` or ``j`` tuples is not :math:`N_{\rm dim}`. """ if self._data_shape is None: if np.any( (i < 0) | (i > self.shape[0] - 1) | (j < 0) | (j > self.shape[1] - 1) ): raise ValueError( "Some indices not valid for covariance matrix with shape " f"{self.shape}." ) return i, j if len(i) != len(self.data_shape): raise ValueError( "Length of input coordinate list (i) is incorrect; expected " f"{len(self.data_shape)}, found {len(i)}" ) if len(j) != len(self.data_shape): raise ValueError( "Length of input coordinate list (j) is incorrect; expected " f"{len(self.data_shape)}, found {len(i)}" ) return np.ravel_multi_index(i, self.data_shape), np.ravel_multi_index( j, self.data_shape )
[docs] def coordinate_data(self, reshape=False): r""" Construct data arrays with the non-zero covariance components in coordinate format. Coordinate format means that the covariance matrix data is provided in three columns providing :math:`\Sigma_{ij}` and the (0-indexed) matrix coordinates :math:`i,j`. This procedure is primarily used when constructing the data arrays for storage. Matching the class convention, the returned data only includes the upper triangle. Parameters ---------- reshape : :obj:`bool`, optional If ``reshape`` is ``True`` and `data_shape` is defined, the :math:`i,j` indices are converted to the expected data-array indices; see :func:`covariance_to_data_indices`. These can be reverted to the coordinates in the covariance matrix using :func:`data_to_covariance_indices`. Returns ------- i, j : tuple, `numpy.ndarray` The row and column indices, :math:`i,j`: of the covariance matrix. If reshaping, these are tuples with the index arrays along each of the reshaped axes. cij : `numpy.ndarray` The covariance, :math:`\Sigma_{ij}`, between array elements at indices :math:`i` and :math:`j`. Raises ------ ValueError Raised if ``reshape`` is True but `data_shape` is undefined. """ if reshape and self._data_shape is None: raise ValueError( "If reshaping, the shape of the data before flattening to the " "covariance array (``data_shape``) must be defined." ) # Get the data (only stores the upper triangle!) i, j, cij = find(self._cov) # Return the data. if reshape: # Reshape the indices and the variance array. return ( np.unravel_index(i, self._data_shape), np.unravel_index(j, self._data_shape), cij, ) return i, j, cij
[docs] def to_table(self): r""" Return the covariance data in a `~astropy.table.Table` using coordinate format. Coordinate format means that the covariance matrix data is provided in three columns providing :math:`\Sigma_{ij}` and the (0-indexed) matrix coordinates :math:`i,j`. The output table has three columns: - ``'INDXI'``: The row index in the covariance matrix. - ``'INDXJ'``: The column index in the covariance matrix. - ``'COVARIJ'``: The covariance at the relevant :math:`i,j` coordinate. The table also contains the following metadata: - ``'COVSHAPE'``: The shape of the covariance matrix - ``'BUNIT'``: (If ``unit`` is defined) The string representation of the covariance units. - ``'COVDSHP'``: (If ``data_shape`` is defined) The shape of the associated data array. If ``data_shape`` is set, the covariance matrix indices are reformatted to match the coordinates in the N-dimensional array. .. warning:: Recall that the storage of covariance matrices for higher dimensional data always assumes a row-major storage order. Objects instantiated by this method can be used to re-instantiate the `Covariance` object using `from_table`. Returns ------- `~astropy.table.Table` Table with the covoariance matrix in coordinate format and the relevant metadata. """ meta = {} meta["COVSHAPE"] = str(self.shape) if self.unit is not None: meta["BUNIT"] = self.unit.to_string() reshape = self._data_shape is not None i, j, cij = self.coordinate_data(reshape=reshape) triu_nnz = cij.size if reshape: meta["COVDSHP"] = str(self._data_shape) i = np.column_stack(i) j = np.column_stack(j) coo_shape = (i.shape[1],) else: coo_shape = None return table.Table( [ table.Column( data=i, name="INDXI", dtype=int, length=triu_nnz, shape=coo_shape ), table.Column( data=j, name="INDXJ", dtype=int, length=triu_nnz, shape=coo_shape ), table.Column(data=cij, name="COVARIJ", dtype=float, length=triu_nnz), ], meta=meta, )
@property def data_shape(self): """ The expected shape of the data array associated with this covariance array. """ return (self.shape[0],) if self._data_shape is None else self._data_shape @property def data_index_map(self): """ An array mapping the index along each axis of the covariance matrix to the shape of the associated data array. """ if self._data_index_map is None: self._data_index_map = np.arange(self.shape[0]) if self._data_shape is not None: self._data_index_map = self._data_index_map.reshape(self._data_shape) return self._data_index_map
[docs] def match_to_data_slice(self, data_slice): """ Return a new `Covariance` instance that is matched to a slice of its parent data array. Parameters ---------- data_slice : slice, array-like Anything that can be used to slice a `numpy.ndarray`. To generate a slice using syntax that mimics accessing numpy array elements, use `numpy.s_`; see examples :ref:`here<covariance-match-to-data-slice>`. Returns ------- `Covariance` A new covariance object for the sliced data array. """ remap = self.data_index_map[data_slice] index = remap.ravel() return Covariance( self.to_sparse()[np.ix_(index, index)], data_shape=None if len(remap.shape) == 1 else remap.shape, )
[docs] @staticmethod def to_correlation(cov, assume_symmetric=False): r""" Convert a covariance matrix into a correlation matrix by dividing each element by the variances. Specifically, extract ``var`` (:math:`V_i = C_{ii} \equiv \sigma^2_i`) and convert ``cov`` from a covariance matrix with elements :math:`C_{ij}` to a correlation matrix with :math:`\rho_{ij}` such that .. math:: C_{ij} \equiv \rho_{ij} \sigma_i \sigma_j. To revert a variance vector and correlation matrix back to a covariance matrix, use :func:`revert_correlation`. Parameters ---------- cov : array-like Covariance matrix to convert. Must be a `~scipy.sparse.csr_matrix` instance or convertible to one. assume_symmetric : bool, optional Assume the matrix is symmetric. This means that a check for symmetry is not performed, and the user is not warned if the matrix is not symmetric. Returns ------- var : `numpy.ndarray` Variance vector rho : `~scipy.sparse.csr_matrix` Correlation matrix Raises ------ ValueError Raised if the input array is not 2D and square. """ # Ingest the matrix _cov = Covariance._ingest_matrix(cov, assume_symmetric=assume_symmetric) # Save the diagonal var = _cov.diagonal() # Find all the non-zero elements i, j, cij = find(_cov) rho = coo_matrix( (cij / np.sqrt(var[i] * var[j]), (i, j)), shape=_cov.shape ).tocsr() return var, rho
[docs] @staticmethod def revert_correlation(var, rho, assume_symmetric=False): r""" Revert a variance vector and correlation matrix into a covariance matrix. This is the reverse operation of `to_correlation`. Parameters ---------- var : `~numpy.ndarray` Variance vector. Length must match the diagonal of ``rho``. rho : `~numpy.ndarray`, `~scipy.sparse.csr_matrix` Correlation matrix. Diagonal must have the same length as ``var``. assume_symmetric : bool, optional Assume the matrix is symmetric. This means that a check for symmetry is not performed, and the user is not warned if the matrix is not symmetric. Returns ------- `~scipy.sparse.csr_matrix` Covariance matrix. """ i, j, rhoij = find( Covariance._ingest_matrix(rho, assume_symmetric=assume_symmetric) ) return coo_matrix( (rhoij * np.sqrt(var[i] * var[j]), (i, j)), shape=rho.shape ).tocsr()