Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a naive sparse histogram. #446

Merged
merged 4 commits into from
Jan 6, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 46 additions & 58 deletions src/hats/pixel_math/sparse_histogram.py
Original file line number Diff line number Diff line change
@@ -1,101 +1,89 @@
"""Sparse 1-D histogram of healpix pixel counts."""

import numpy as np
from scipy.sparse import csc_array, load_npz, save_npz, sparray

import hats.pixel_math.healpix_shim as hp


class SparseHistogram:
"""Wrapper around scipy's sparse array."""
"""Wrapper around a naive sparse array, that is just non-zero indexes and counts.

def __init__(self, sparse_array):
if not isinstance(sparse_array, sparray):
raise ValueError("The sparse array must be a scipy sparse array.")
if sparse_array.format != "csc":
raise ValueError("The sparse array must be a Compressed Sparse Column array.")
self.sparse_array = sparse_array
e.g. for a dense 1-d numpy histogram of order 0, you might see::

def add(self, other):
"""Add in another sparse histogram, updating this wrapper's array.
[0, 4, 0, 0, 0, 0, 0, 0, 9, 0, 0]

Args:
other (SparseHistogram): the wrapper containing the addend
"""
if not isinstance(other, SparseHistogram):
raise ValueError("Both addends should be SparseHistogram.")
if self.sparse_array.shape != other.sparse_array.shape:
raise ValueError(
"The histogram partials have incompatible sizes due to different healpix orders."
)
self.sparse_array += other.sparse_array
There are only elements at [1, 8], and they have respective values [4, 9]. You
would create the sparse histogram like::

SparseHistogram([1, 8], [4, 9], 0)
"""

def __init__(self, indexes, counts, order):
if len(indexes) != len(counts):
raise ValueError("indexes and counts must be same length")
self.indexes = indexes
self.counts = counts
self.order = order

def to_array(self):
"""Convert the sparse array to a dense numpy array.

Returns:
dense 1-d numpy array.
"""
return self.sparse_array.toarray()[0]
dense = np.zeros(hp.order2npix(self.order), dtype=np.int64)
dense[self.indexes] = self.counts
return dense

def to_file(self, file_name):
"""Persist the sparse array to disk.

NB: this saves as a sparse array, and so will likely have lower space requirements
than saving the corresponding dense 1-d numpy array.
"""
save_npz(file_name, self.sparse_array)
np.savez(file_name, indexes=self.indexes, counts=self.counts, order=self.order)

def to_dense_file(self, file_name):
"""Persist the DENSE array to disk as a numpy array."""
with open(file_name, "wb+") as file_handle:
file_handle.write(self.to_array().data)

@classmethod
def make_empty(cls, healpix_order=10):
"""Create an empty sparse array for a given healpix order.

Args:
healpix_order (int): healpix order
def from_file(cls, file_name):
"""Read sparse histogram from a file.

Returns:
new sparse histogram
"""
histo = csc_array((1, hp.order2npix(healpix_order)), dtype=np.int64)
return cls(histo)

@classmethod
def make_from_counts(cls, indexes, counts_at_indexes, healpix_order=10):
"""Create an sparse array for a given healpix order, prefilled with counts at
the provided indexes.
npzfile = np.load(file_name)
return cls(npzfile["indexes"], npzfile["counts"], npzfile["order"])

e.g. for a dense 1-d numpy histogram of order 0, you might see::

[0, 4, 0, 0, 0, 0, 0, 0, 9, 0, 0]
class HistogramAggregator:
"""Utility for aggregating sparse histograms."""

There are only elements at [1, 8], and they have respective values [4, 9]. You
would create the sparse histogram like::
def __init__(self, order):
self.order = order
self.full_histogram = np.zeros(hp.order2npix(order), dtype=np.int64)

make_from_counts([1, 8], [4, 9], 0)
def add(self, other):
"""Add in another sparse histogram, updating this wrapper's array.

Args:
indexes (int[]): index locations of non-zero values
counts_at_indexes (int[]): values at the ``indexes``
healpix_order (int): healpix order

Returns:
new sparse histogram
"""
row = np.array(np.zeros(len(indexes), dtype=np.int64))
histo = csc_array((counts_at_indexes, (row, indexes)), shape=(1, hp.order2npix(healpix_order)))
return cls(histo)

@classmethod
def from_file(cls, file_name):
"""Read sparse histogram from a file.

Returns:
new sparse histogram
other (SparseHistogram): the wrapper containing the addend
"""
histo = load_npz(file_name)
return cls(histo)
if not isinstance(other, SparseHistogram):
raise ValueError("Both addends should be SparseHistogram.")
if self.order != other.order:
raise ValueError(
"The histogram partials have incompatible sizes due to different healpix orders."
)
if len(other.indexes) == 0:
return
self.full_histogram[other.indexes] += other.counts

def to_sparse(self):
"""Return a SparseHistogram, based on non-zero values in this aggregation."""
indexes = self.full_histogram.nonzero()[0]
counts = self.full_histogram[indexes]
return SparseHistogram(indexes, counts, self.order)
50 changes: 24 additions & 26 deletions tests/hats/pixel_math/test_sparse_histogram.py
Original file line number Diff line number Diff line change
@@ -4,22 +4,21 @@
import numpy.testing as npt
import pytest
from numpy import frombuffer
from scipy.sparse import csr_array

import hats.pixel_math.healpix_shim as hp
from hats.pixel_math.sparse_histogram import SparseHistogram
from hats.pixel_math.sparse_histogram import HistogramAggregator, SparseHistogram


def test_make_empty():
"""Tests the initialization of an empty histogram at the specified order"""
histogram = SparseHistogram.make_empty(5)
histogram = SparseHistogram([], [], 5)
expected_hist = np.zeros(hp.order2npix(5))
npt.assert_array_equal(expected_hist, histogram.to_array())


def test_read_write_round_trip(tmp_path):
"""Test that we can read what we write into a histogram file."""
histogram = SparseHistogram.make_from_counts([11], [131], 0)
histogram = SparseHistogram([11], [131], 0)

# Write as a sparse array
file_name = tmp_path / "round_trip_sparse.npz"
@@ -38,43 +37,42 @@ def test_read_write_round_trip(tmp_path):
def test_add_same_order():
"""Test that we can add two histograms created from the same order, and get
the expected results."""
partial_histogram_left = SparseHistogram.make_from_counts([11], [131], 0)
partial_histogram_left = SparseHistogram([11], [131], 0)

partial_histogram_right = SparseHistogram.make_from_counts([10, 11], [4, 15], 0)
partial_histogram_right = SparseHistogram([10, 11], [4, 15], 0)

partial_histogram_left.add(partial_histogram_right)
total_histogram = HistogramAggregator(0)
total_histogram.add(partial_histogram_left)
total_histogram.add(partial_histogram_right)

expected = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 146]
npt.assert_array_equal(partial_histogram_left.to_array(), expected)
npt.assert_array_equal(total_histogram.full_histogram, expected)

sparse = total_histogram.to_sparse()
npt.assert_array_equal(sparse.indexes, [10, 11])
npt.assert_array_equal(sparse.counts, [4, 146])
npt.assert_array_equal(sparse.order, 0)


def test_add_different_order():
"""Test that we can NOT add histograms of different healpix orders."""
partial_histogram_left = SparseHistogram.make_from_counts([11], [131], 0)

partial_histogram_right = SparseHistogram.make_from_counts([10, 11], [4, 15], 1)
partial_histogram_left = SparseHistogram([11], [131], 0)
partial_histogram_right = SparseHistogram([10, 11], [4, 15], 1)

total_histogram = HistogramAggregator(0)
total_histogram.add(partial_histogram_left)
with pytest.raises(ValueError, match="partials have incompatible sizes"):
partial_histogram_left.add(partial_histogram_right)
total_histogram.add(partial_histogram_right)


def test_add_different_type():
"""Test that we can NOT add histograms of different healpix orders."""
partial_histogram_left = SparseHistogram.make_from_counts([11], [131], 0)
partial_histogram_left = SparseHistogram([11], [131], 0)

total_histogram = HistogramAggregator(0)
total_histogram.add(partial_histogram_left)
with pytest.raises(ValueError, match="addends should be SparseHistogram"):
partial_histogram_left.add(5)
total_histogram.add(5)

with pytest.raises(ValueError, match="addends should be SparseHistogram"):
partial_histogram_left.add([1, 2, 3, 4, 5])


def test_init_bad_inputs():
"""Test that the SparseHistogram type requires a compressed sparse column
as its sole `sparse_array` argument."""
with pytest.raises(ValueError, match="must be a scipy sparse array"):
SparseHistogram(5)

with pytest.raises(ValueError, match="must be a Compressed Sparse Column"):
row_sparse_array = csr_array((1, 12), dtype=np.int64)
SparseHistogram(row_sparse_array)
total_histogram.add(([1, 2, 3, 4, 5], [1, 2, 3, 4, 5], 0))