Source code for beorn.structs.halo_catalog

"""Halo catalog data structure."""
from dataclasses import dataclass
import numpy as np
import logging

from .parameters import Parameters
from ..particle_mapping import map_particles_to_mesh

logger = logging.getLogger(__name__)



[docs]
@dataclass
class HaloCatalog:
    """Container for halo positions, masses and associated properties.

    Instances represent all halos at a single given redshift and provide
    filtering and utility methods used by the painting stage.
    """

    positions: np.ndarray
    """Halo positions in 3D space (X, Y, Z coordinates) in units of cMpc => shape=(N, 3)"""

    masses: np.ndarray
    """Halo masses in units of Msun => shape=(N,)"""

    parameters: Parameters
    """The parameters of the simulation, which are used to filter the halo catalog."""

    redshift_index: int = 0
    """The index of the redshift snapshot that this catalog corresponds to. This is used to look up accretion history"""

    redshift: float = None
    """Actual redshift of this snapshot. Preferred over redshift_index for any quantity that needs z."""

    alphas: np.ndarray = None
    """
    Halo mass accretion rate, calculated from the mass history of the halo. If not available, it is set to a default value of 0.79.
    Inputs that provide a mass history will override this value to have the same shape as the masses array.
    """

    def __post_init__(self):
        self.masses = np.asarray(self.masses)
        assert self.positions.shape[0] == self.masses.size, "Halo catalog arrays must have the same length."

        if self.alphas is None:
            logger.info("No alpha values provided, using default value of 0.79 for all halos.")
            self.alphas = np.ones(self.masses.size) * 0.79

        # filter out haloes that are considerd non-star forming
        condition_min = self.masses > self.parameters.source.halo_mass_min
        condition_max = self.masses < self.parameters.source.halo_mass_max
        # logger.debug(f"Removing {np.sum(~(condition_min & condition_max))} halos that are outside the parameter mass range")
        self.positions = self.positions[condition_min & condition_max, :]
        self.masses = self.masses[condition_min & condition_max]
        self.alphas = self.alphas[condition_min & condition_max]


    @property
    def size(self) -> int:
        """
        Returns the number of halos in the catalog.
        """
        return self.masses.size



[docs]
    def get_halo_indices(self, alpha_range: list[float], mass_range: list[float]) -> np.ndarray:
        """Return indices of halos within provided alpha and mass ranges.

        Args:
            alpha_range (list[float]): [alpha_min, alpha_max).
            mass_range (list[float]): [mass_min, mass_max).

        Returns:
            numpy.ndarray: Integer indices of matching halos (may be empty).
        """
        if self.masses.size == 0:
            return []

        alpha_inf, alpha_sup = alpha_range
        mass_inf, mass_sup = mass_range

        # Get the indices of the halos that are within the mass and alpha range
        indices_match = np.where(
            (self.masses >= mass_inf) & (self.masses < mass_sup) &
            (self.alphas >= alpha_inf) & (self.alphas < alpha_sup)
        )[0]
        # in this case where returns two arrays, we only want the first one

        # if indices_match.size != 0:
        #     logger.debug(f"{alpha_range=} and {mass_range=} resulted in matches: {indices_match.size}")
        return indices_match




[docs]
    def to_mesh(self) -> np.ndarray:
        """Rasterize halo positions into a 3D number-count mesh.

        The mesh uses the nearest-neighbor mass-assignment scheme. The returned array represents halo counts
        (number density), not mass.

        Returns:
            numpy.ndarray: 3D float32 array with shape (Ncell, Ncell, Ncell).
        """
        physical_size = self.parameters.simulation.Lbox
        grid_size = self.parameters.simulation.Ncell
        mesh = np.zeros((grid_size, grid_size, grid_size), dtype=np.float32)
        backend = self.parameters.cosmo_sim.particle_mapping_backend
        map_particles_to_mesh(
            mesh, physical_size, self.positions.astype(np.float32),
            mass_assignment="NGP", backend=backend,
        )
        return mesh




    ### methods that create new halo catalogs

[docs]
    def at_indices(self, indices) -> "HaloCatalog":
        """Return a new :class:`HaloCatalog` restricted to the given indices.

        Args:
            indices (array-like): Indices selecting a subset of halos.

        Returns:
            HaloCatalog: New instance containing only the selected halos.
        """
        if indices.size == 0:
            indices = []

        return HaloCatalog(
            positions = self.positions[indices, :],
            masses = self.masses[indices],
            parameters = self.parameters,
            redshift_index = self.redshift_index,
            redshift = self.redshift,
            alphas = self.alphas[indices]
        )




[docs]
    def halo_mass_function(self, bin_count: int = None) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
        """Compute the halo mass function (dn/dlnM) and Poisson errors.

        Args:
            bin_count (int|None): Number of mass bins. If ``None`` a
                default value proportional to the mass dynamic range is used.

        Returns:
            tuple: ``(bin_edges, hmf, error)`` where ``hmf`` is in
            units of (Mpc/h)^-3 and ``error`` is the Poisson uncertainty.
        """
        Lbox = self.parameters.simulation.Lbox

        if bin_count is None:
            bin_count = int(10 * np.log10(self.masses.max() / self.masses.min()))
        # increase the range a bit to capture all halos
        bin_edges = np.logspace(np.log(self.masses.min() * 0.9), np.log(self.masses.max() * 1.1), bin_count + 1, base=np.e)
        log_spacing = np.log(bin_edges[1]) - np.log(bin_edges[0])
        # digitize the masses into the bins
        indices = np.digitize(self.masses, bin_edges) - 1
        # -1 because digitize returns the index of the bin to the right of the value
        indices, count = np.unique(indices, return_counts=True)
        # now we have the counts in each bin, but only for the bins that have at least one halo
        full_count = np.zeros(bin_count, dtype=int)
        full_count[indices] = count

        # for the halo mass function we want dn / d ln(M) and then the normalization by the box volume
        hmf = full_count / Lbox**3 / log_spacing

        # for the error we assume Poisson statistics
        error = hmf / np.sqrt(full_count)
        assert hmf.size == bin_count, f"{hmf.size=} does not match expected {bin_count=}"
        assert error.size == bin_count, f"{error.size=} does not match expected {bin_count=}"
        return bin_edges, hmf, error