Skip to content

MI

sgptools.objectives.MI

Bases: Objective

Computes the Mutual Information (MI) between a fixed set of objective points (X_objective) and a variable set of input points (X).

MI is calculated as: \(MI(X; X_{objective}) = log|K(X,X)| + log|K(X_{objective},X_{objective})| - log|K(X \cup X_{objective}, X \cup X_{objective})|\)

Jitter is added to the diagonal of the covariance matrices to ensure numerical stability.

Source code in sgptools/objectives.py
class MI(Objective):
    """
    Computes the Mutual Information (MI) between a fixed set of objective points
    (`X_objective`) and a variable set of input points (`X`).

    MI is calculated as:
    $MI(X; X_{objective}) = log|K(X,X)| + log|K(X_{objective},X_{objective})| - log|K(X \cup X_{objective}, X \cup X_{objective})|$

    Jitter is added to the diagonal of the covariance matrices to ensure numerical stability.
    """

    def __init__(self,
                 X_objective: np.ndarray,
                 kernel: gpflow.kernels.Kernel,
                 noise_variance: float,
                 jitter: float = 1e-6,
                 **kwargs: Any):
        """
        Initializes the Mutual Information (MI) objective.

        Args:
            X_objective (np.ndarray): The fixed set of data points (e.g., candidate locations
                                      or training data points) against which MI is computed.
                                      Shape: (N, D).
            kernel (gpflow.kernels.Kernel): The GPflow kernel function to compute covariances.
            noise_variance (float): The observed data noise variance, which is added to the jitter.
            jitter (float): A small positive value to add for numerical stability to covariance
                            matrix diagonals. Defaults to 1e-6.
            **kwargs: Arbitrary keyword arguments.
        """
        self.X_objective = tf.constant(X_objective, dtype=tf.float64)
        self.kernel = kernel
        self.noise_variance = noise_variance
        # Total jitter includes the noise variance
        self._base_jitter = jitter
        self.jitter_fn = lambda cov: jitter_fn(
            cov, jitter=self._base_jitter + self.noise_variance)

    def __call__(self, X: tf.Tensor) -> tf.Tensor:
        """
        Computes the Mutual Information for the given input points `X`.

        Args:
            X (tf.Tensor): The input points (e.g., sensing locations) for which
                           MI is to be computed. Shape: (M, D).

        Returns:
            tf.Tensor: The computed Mutual Information value.

        Usage:
            ```python
            import gpflow
            import numpy as np
            # Assume X_objective and kernel are defined
            # X_objective = np.random.rand(100, 2)
            # kernel = gpflow.kernels.SquaredExponential()
            # noise_variance = 0.1

            mi_objective = MI(X_objective=X_objective, kernel=kernel, noise_variance=noise_variance)
            X_sensing = tf.constant(np.random.rand(10, 2), dtype=tf.float64)
            mi_value = mi_objective(X_sensing)
            ```
        """
        # K(X_objective, X_objective)
        K_obj_obj = self.kernel(self.X_objective)
        # K(X, X)
        K_X_X = self.kernel(X)
        # K(X_objective U X, X_objective U X)
        K_combined = self.kernel(tf.concat([self.X_objective, X], axis=0))

        # Compute log determinants
        logdet_K_obj_obj = tf.math.log(tf.linalg.det(
            self.jitter_fn(K_obj_obj)))
        logdet_K_X_X = tf.math.log(tf.linalg.det(self.jitter_fn(K_X_X)))
        logdet_K_combined = tf.math.log(
            tf.linalg.det(self.jitter_fn(K_combined)))

        # MI formula
        mi = logdet_K_obj_obj + logdet_K_X_X - logdet_K_combined

        return mi

    def update(self, kernel: gpflow.kernels.Kernel,
               noise_variance: float) -> None:
        """
        Updates the kernel and noise variance for the MI objective.
        This method is crucial for optimizing the GP hyperparameters externally
        and having the objective function reflect those changes.

        Args:
            kernel (gpflow.kernels.Kernel): The updated GPflow kernel function.
            noise_variance (float): The updated data noise variance.
        """
        # Update kernel's trainable variables (e.g., lengthscales, variance)
        for self_var, var in zip(self.kernel.trainable_variables,
                                 kernel.trainable_variables):
            self_var.assign(var)

        self.noise_variance = noise_variance
        # Update the jitter function to reflect the new noise variance
        self.jitter_fn = lambda cov: jitter_fn(
            cov, jitter=self._base_jitter + self.noise_variance)

__call__(X)

Computes the Mutual Information for the given input points X.

Parameters:

Name Type Description Default
X Tensor

The input points (e.g., sensing locations) for which MI is to be computed. Shape: (M, D).

required

Returns:

Type Description
Tensor

tf.Tensor: The computed Mutual Information value.

Usage
import gpflow
import numpy as np
# Assume X_objective and kernel are defined
# X_objective = np.random.rand(100, 2)
# kernel = gpflow.kernels.SquaredExponential()
# noise_variance = 0.1

mi_objective = MI(X_objective=X_objective, kernel=kernel, noise_variance=noise_variance)
X_sensing = tf.constant(np.random.rand(10, 2), dtype=tf.float64)
mi_value = mi_objective(X_sensing)
Source code in sgptools/objectives.py
def __call__(self, X: tf.Tensor) -> tf.Tensor:
    """
    Computes the Mutual Information for the given input points `X`.

    Args:
        X (tf.Tensor): The input points (e.g., sensing locations) for which
                       MI is to be computed. Shape: (M, D).

    Returns:
        tf.Tensor: The computed Mutual Information value.

    Usage:
        ```python
        import gpflow
        import numpy as np
        # Assume X_objective and kernel are defined
        # X_objective = np.random.rand(100, 2)
        # kernel = gpflow.kernels.SquaredExponential()
        # noise_variance = 0.1

        mi_objective = MI(X_objective=X_objective, kernel=kernel, noise_variance=noise_variance)
        X_sensing = tf.constant(np.random.rand(10, 2), dtype=tf.float64)
        mi_value = mi_objective(X_sensing)
        ```
    """
    # K(X_objective, X_objective)
    K_obj_obj = self.kernel(self.X_objective)
    # K(X, X)
    K_X_X = self.kernel(X)
    # K(X_objective U X, X_objective U X)
    K_combined = self.kernel(tf.concat([self.X_objective, X], axis=0))

    # Compute log determinants
    logdet_K_obj_obj = tf.math.log(tf.linalg.det(
        self.jitter_fn(K_obj_obj)))
    logdet_K_X_X = tf.math.log(tf.linalg.det(self.jitter_fn(K_X_X)))
    logdet_K_combined = tf.math.log(
        tf.linalg.det(self.jitter_fn(K_combined)))

    # MI formula
    mi = logdet_K_obj_obj + logdet_K_X_X - logdet_K_combined

    return mi

__init__(X_objective, kernel, noise_variance, jitter=1e-06, **kwargs)

Initializes the Mutual Information (MI) objective.

Parameters:

Name Type Description Default
X_objective ndarray

The fixed set of data points (e.g., candidate locations or training data points) against which MI is computed. Shape: (N, D).

required
kernel Kernel

The GPflow kernel function to compute covariances.

required
noise_variance float

The observed data noise variance, which is added to the jitter.

required
jitter float

A small positive value to add for numerical stability to covariance matrix diagonals. Defaults to 1e-6.

1e-06
**kwargs Any

Arbitrary keyword arguments.

{}
Source code in sgptools/objectives.py
def __init__(self,
             X_objective: np.ndarray,
             kernel: gpflow.kernels.Kernel,
             noise_variance: float,
             jitter: float = 1e-6,
             **kwargs: Any):
    """
    Initializes the Mutual Information (MI) objective.

    Args:
        X_objective (np.ndarray): The fixed set of data points (e.g., candidate locations
                                  or training data points) against which MI is computed.
                                  Shape: (N, D).
        kernel (gpflow.kernels.Kernel): The GPflow kernel function to compute covariances.
        noise_variance (float): The observed data noise variance, which is added to the jitter.
        jitter (float): A small positive value to add for numerical stability to covariance
                        matrix diagonals. Defaults to 1e-6.
        **kwargs: Arbitrary keyword arguments.
    """
    self.X_objective = tf.constant(X_objective, dtype=tf.float64)
    self.kernel = kernel
    self.noise_variance = noise_variance
    # Total jitter includes the noise variance
    self._base_jitter = jitter
    self.jitter_fn = lambda cov: jitter_fn(
        cov, jitter=self._base_jitter + self.noise_variance)

update(kernel, noise_variance)

Updates the kernel and noise variance for the MI objective. This method is crucial for optimizing the GP hyperparameters externally and having the objective function reflect those changes.

Parameters:

Name Type Description Default
kernel Kernel

The updated GPflow kernel function.

required
noise_variance float

The updated data noise variance.

required
Source code in sgptools/objectives.py
def update(self, kernel: gpflow.kernels.Kernel,
           noise_variance: float) -> None:
    """
    Updates the kernel and noise variance for the MI objective.
    This method is crucial for optimizing the GP hyperparameters externally
    and having the objective function reflect those changes.

    Args:
        kernel (gpflow.kernels.Kernel): The updated GPflow kernel function.
        noise_variance (float): The updated data noise variance.
    """
    # Update kernel's trainable variables (e.g., lengthscales, variance)
    for self_var, var in zip(self.kernel.trainable_variables,
                             kernel.trainable_variables):
        self_var.assign(var)

    self.noise_variance = noise_variance
    # Update the jitter function to reflect the new noise variance
    self.jitter_fn = lambda cov: jitter_fn(
        cov, jitter=self._base_jitter + self.noise_variance)