"""Statistical estimators for :class:`~beorn.structs.TemporalCube` outputs.
Usage::
stats = StatisticsEstimator(multi_z_quantities, parameters)
stats.save() # writes igm_stats_<tag>.h5 next to the cube dir
stats.save('my_stats.h5') # explicit path, or format='pickle'
# reuse without re-computing (auto-loads from default path if file exists)
stats2 = StatisticsEstimator(multi_z_quantities, parameters)
results = stats2.results # loads from disk; falls back to compute()
# load raw arrays without a cube object
data = StatisticsEstimator.load('igm_stats_<tag>.h5')
Extend by subclassing and appending to ``_estimators``::
class MyStats(StatisticsEstimator):
_estimators = StatisticsEstimator._estimators + ['variance_signals']
def variance_signals(self) -> dict:
return {'var_dTb': np.var(self.cube.Grid_dTb[:], axis=(1, 2, 3))}
"""
from __future__ import annotations
import pickle
from pathlib import Path
from typing import TYPE_CHECKING
import numpy as np
if TYPE_CHECKING:
from .temporal_cube import TemporalCube
from .parameters import Parameters
[docs]
class StatisticsEstimator:
"""Compute and persist summary statistics from a :class:`~beorn.structs.TemporalCube`.
Each entry in ``_estimators`` names a method on this class. ``compute()``
calls them in order and merges their ``dict`` returns into a single results
dict. Subclasses add new estimators by extending the list and implementing
the corresponding method.
``results`` is a lazy property: on first access it tries to load from
:meth:`default_path`; if no file exists there it calls :meth:`compute`.
Args:
cube: Multi-redshift simulation output.
parameters: Simulation parameters (needed for k-bins, box size, etc.).
"""
_estimators: list[str] = ['global_signals', 'power_spectra']
def __init__(self, cube: TemporalCube | None = None, parameters: Parameters | None = None) -> None:
self.cube = cube
self.parameters = parameters
self._results: dict | None = None
[docs]
@classmethod
def from_file(cls, path: str | Path, format: str = 'hdf5') -> 'StatisticsEstimator':
"""Load a previously saved statistics file without a TemporalCube.
The returned instance has ``cube=None`` and its :attr:`results` are
pre-populated from disk. It can be passed directly to the
``draw_*`` plotting functions.
Args:
path: File written by :meth:`save`.
format: ``'hdf5'`` or ``'pickle'``. Inferred from extension when omitted.
Returns:
StatisticsEstimator: Instance with results loaded from *path*.
Example::
stats = MyStats.from_file('output/igm_stats_py21cmfast_N128_….h5')
beorn.plotting.draw_dTb_signal(ax, stats, color='k')
"""
inst = cls.__new__(cls)
inst.cube = None
inst.parameters = None
inst._results = cls.load(path, format=format)
return inst
# ------------------------------------------------------------------
# Default file path
# ------------------------------------------------------------------
[docs]
def default_path(self, format: str = 'hdf5') -> Path | None:
"""Return the default statistics file path next to the cube directory.
The name is derived from the cube directory by replacing the
``igm_data_`` prefix with ``igm_stats_`` and appending ``.h5`` or
``.pkl``. Returns ``None`` when the cube has no ``_file_path``.
Example:
cube dir → ``output/igm_data_py21cmfast_N128_…_4024dbb5``
stats file → ``output/igm_stats_py21cmfast_N128_…_4024dbb5.h5``
"""
cube_path = getattr(self.cube, '_file_path', None)
if cube_path is None:
return None
stem = Path(cube_path).name
stats_stem = stem.replace('igm_data_', 'igm_stats_', 1) if stem.startswith('igm_data_') else f'igm_stats_{stem}'
ext = '.pkl' if format == 'pickle' else '.h5'
return Path(cube_path).parent / (stats_stem + ext)
# ------------------------------------------------------------------
# Lazy results property
# ------------------------------------------------------------------
@property
def results(self) -> dict:
"""Cached statistics dict. Loads from :meth:`default_path` if the file
exists; otherwise calls :meth:`compute` and caches the result."""
if self._results is None:
path = self.default_path()
if path is not None and path.exists():
self._results = StatisticsEstimator.load(path)
else:
self._results = self.compute()
return self._results
# ------------------------------------------------------------------
# Built-in estimators
# ------------------------------------------------------------------
[docs]
def global_signals(self) -> dict:
"""Global (volume-averaged) signal histories.
Returns:
dict with keys ``z``, ``mean_dTb``, ``mean_Temp``, ``mean_xHII``, ``mean_xal``.
"""
return {
'z': self.cube.z[:],
'mean_dTb': self.cube.global_mean('dTb'),
'mean_Temp': self.cube.global_mean('Temp'),
'mean_xHII': self.cube.global_mean('xHII'),
'mean_xal': self.cube.global_mean('xal'),
}
[docs]
def power_spectra(self) -> dict:
"""Dimensionless 21-cm power spectrum :math:`\\Delta^2_{21}(k, z)`.
Returns:
dict with keys ``k`` (shape ``(n_k,)``) and
``ps_dTb`` (shape ``(n_z, n_k)``).
"""
ps, k = self.cube.power_spectrum(self.cube.Grid_dTb, self.parameters)
mean_dTb = self.cube.global_mean('dTb')
delta2 = ps * k[np.newaxis, :] ** 3 * mean_dTb[:, np.newaxis] ** 2 / (2 * np.pi ** 2)
return {
'k': k,
'ps_dTb': delta2,
}
# ------------------------------------------------------------------
# Orchestration
# ------------------------------------------------------------------
[docs]
def compute(self) -> dict:
"""Run all registered estimators and return a merged results dict."""
results: dict = {}
for name in self._estimators:
results.update(getattr(self, name)())
return results
# ------------------------------------------------------------------
# Persistence
# ------------------------------------------------------------------
[docs]
def save(self, path: str | Path | None = None, format: str = 'hdf5') -> Path:
"""Compute statistics and write them to *path*.
If *path* is omitted, :meth:`default_path` is used. The computed
results are also cached in :attr:`_results` so a subsequent call to
:attr:`results` does not re-compute.
Args:
path: Output file path, or ``None`` to use the default.
format: ``'hdf5'`` (default) or ``'pickle'``.
Returns:
Path: The file that was written.
Raises:
ValueError: If *path* is ``None`` and the cube has no ``_file_path``.
"""
if path is None:
path = self.default_path(format=format)
if path is None:
raise ValueError(
"Cannot derive a default path because the cube has no _file_path. "
"Pass an explicit path to save()."
)
path = Path(path)
results = self.compute()
self._results = results
if format == 'hdf5':
import h5py
with h5py.File(path, 'w') as f:
for key, val in results.items():
f.create_dataset(key, data=np.asarray(val))
elif format == 'pickle':
with open(path, 'wb') as fh:
pickle.dump(results, fh)
else:
raise ValueError(f"Unknown format '{format}'. Supported: 'hdf5', 'pickle'.")
return path
[docs]
@staticmethod
def load(path: str | Path, format: str = 'hdf5') -> dict:
"""Load previously saved statistics from *path*.
The format is inferred from the file extension when *format* is not
given explicitly: ``.h5`` / ``.hdf5`` → ``'hdf5'``; ``.pkl`` /
``.pickle`` → ``'pickle'``.
Args:
path: File written by :meth:`save`.
format: ``'hdf5'`` or ``'pickle'``. Inferred from extension when omitted.
Returns:
dict mapping statistic names to numpy arrays.
"""
path = Path(path)
if path.suffix in ('.pkl', '.pickle'):
format = 'pickle'
elif path.suffix in ('.h5', '.hdf5'):
format = 'hdf5'
if format == 'hdf5':
import h5py
with h5py.File(path, 'r') as f:
return {key: f[key][:] for key in f}
elif format == 'pickle':
with open(path, 'rb') as fh:
return pickle.load(fh)
else:
raise ValueError(f"Unknown format '{format}'. Supported: 'hdf5', 'pickle'.")