Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Output Python floats #1636

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/install-env/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ runs:
uses: actions/cache@v4
with:
path: ~/.local # the path depends on the OS
key: poetry-2 # increment to reset cache
key: poetry-4 # increment to reset cache

- name: Install poetry
uses: snok/install-poetry@v1
Expand Down
31 changes: 9 additions & 22 deletions build.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,23 @@
import platform
from distutils.command.build_ext import build_ext
from distutils.errors import CCompilerError, DistutilsExecError, DistutilsPlatformError

import numpy
import setuptools
from Cython.Build import cythonize
from setuptools.command.build_ext import build_ext
from setuptools.errors import CCompilerError
from setuptools_rust import Binding, RustExtension

try:
from numpy import __version__ as numpy_version
from numpy import get_include
except ImportError:
subprocess.check_call([sys.executable, "-m", "pip", "install", "numpy"])
from numpy import __version__ as numpy_version
from numpy import get_include

try:
from Cython.Build import cythonize
except ImportError:
subprocess.check_call([sys.executable, "-m", "pip", "install", "Cython"])
from Cython.Build import cythonize # type: ignore


ext_modules = cythonize(
module_list=[
setuptools.Extension(
"*",
sources=["**/*.pyx"],
include_dirs=[get_include()],
sources=["river/**/*.pyx"],
include_dirs=[numpy.get_include()],
libraries=[] if platform.system() == "Windows" else ["m"],
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
)
],
compiler_directives={
"language_level": 3,
"binding": True,
"embedsignature": True,
},
Expand All @@ -47,13 +34,13 @@ class ExtBuilder(build_ext):
def run(self):
try:
build_ext.run(self)
except (DistutilsPlatformError, FileNotFoundError):
except (FileNotFoundError):
raise BuildFailed("File not found. Could not compile C extension.")

def build_extension(self, ext):
try:
build_ext.build_extension(self, ext)
except (CCompilerError, DistutilsExecError, DistutilsPlatformError, ValueError):
except (CCompilerError, ValueError):
raise BuildFailed("Could not compile C extension.")


Expand Down
4,059 changes: 2,142 additions & 1,917 deletions poetry.lock

Large diffs are not rendered by default.

32 changes: 19 additions & 13 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
[build-system]
requires = ["poetry-core>=1.0.0", "cython", "numpy", "setuptools", "wheel", "setuptools-rust"]
requires = [
"poetry-core>=1.0.0",
"cython>3",
"numpy>=2.0.0",
"setuptools>70.1.0",
"setuptools-rust",
]
build-backend = "poetry.core.masonry.api"

[tool.poetry]
Expand All @@ -20,7 +26,7 @@ include = [
"river/datasets/*.zip",
"river/stream/*.zip",
"Cargo.toml",
"rust_src/**/*"
"rust_src/**/*",
]

[tool.poetry.build]
Expand All @@ -29,34 +35,34 @@ script = "build.py"

[tool.poetry.dependencies]
python = "^3.9"
numpy = "^1.23.0"
scipy = "^1.12.1"
pandas = "^2.1"
numpy = ">=1.23.0"
scipy = "^1.13.1"
pandas = "^2.2.3"

[tool.poetry.group.dev.dependencies]
graphviz = "^0.20.1"
gymnasium = "^0.29.0"
matplotlib = "^3.0.2"
matplotlib = "^3.8.4"
mypy = "^1.11.1"
pre-commit = "^3.5.0"
pytest = "^7.4.2"
ruff = "^0.4.10"
scikit-learn = "^1.3.1"
scikit-learn = "^1.5.1"
sqlalchemy = "^2.0.22"
sympy = "^1.10.1"
pytest-xdist = {extras = ["psutil"], version = "^3.3.1"}
sympy = "^1.12.1"
pytest-xdist = { extras = ["psutil"], version = "^3.3.1" }
ipykernel = "^6.26.0"
ipython = "^8.17.2"
rich = "^13.6.0"
jupyter = "^1.0.0"
mike = "^2.0.0"
polars = "^0.20.8"
polars = "^1.1.0"

[tool.poetry.group.compat]
optional = true

[tool.poetry.group.compat.dependencies]
scikit-learn = "^1.0.1"
scikit-learn = "^1.5.1"
sqlalchemy = "^2.0.0"

[tool.poetry.group.docs]
Expand Down Expand Up @@ -84,7 +90,7 @@ optional = true

[tool.poetry.group.benchmark.dependencies]
"dominate" = "2.8.0"
"scikit-learn" = "1.3.1"
"scikit-learn" = "1.5.1"
"tabulate" = "0.9.0"
"vowpalwabbit" = "9.9.0"
"watermark" = "2.4.3"
Expand Down Expand Up @@ -161,7 +167,7 @@ module = [
"requests.*",
"gymnasium.*",
"sympy.*",
"polars.*"
"polars.*",
]
ignore_missing_imports = true

Expand Down
4 changes: 2 additions & 2 deletions river/compose/test_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,10 @@ def test_issue_1253():
>>> model = group1 + group1 * group2
>>> XT = model.transform_many(X)

>>> XT.memory_usage().sum() // 1000
>>> XT.memory_usage().sum().item() // 1000
85

>>> XT.sparse.to_dense().memory_usage().sum() // 1000
>>> XT.sparse.to_dense().memory_usage().sum().item() // 1000
4455

>>> X, y = datasets.make_regression(n_samples=6, n_features=2)
Expand Down
2 changes: 1 addition & 1 deletion river/datasets/synth/anomaly_sine.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,4 @@ def __iter__(self):
self._generate_data()

for xi, yi in itertools.zip_longest(self.X, self.y if hasattr(self.y, "__iter__") else []):
yield dict(zip(["sine", "cosine"], xi)), bool(yi)
yield dict(zip(["sine", "cosine"], xi.tolist())), bool(yi)
5 changes: 4 additions & 1 deletion river/datasets/synth/logical.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,10 @@ def __iter__(self):
X, Y = self._make_logical(n_tiles=self.n_tiles, shuffle=self.shuffle)

for xi, yi in itertools.zip_longest(X, Y if hasattr(Y, "__iter__") else []):
yield dict(zip(self.feature_names, xi)), dict(zip(self.target_names, yi))
yield (
dict(zip(self.feature_names, xi.tolist())),
dict(zip(self.target_names, yi.tolist())),
)

def _make_logical(self, n_tiles: int = 1, shuffle: bool = True):
"""Make toy dataset"""
Expand Down
2 changes: 1 addition & 1 deletion river/ensemble/streaming_random_patches.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ class SRPClassifier(BaseSRPEnsemble, base.Classifier):
>>> metric = metrics.Accuracy()

>>> evaluate.progressive_val_score(dataset, model, metric)
Accuracy: 72.17%
Accuracy: 72.77%

Notes
-----
Expand Down
2 changes: 1 addition & 1 deletion river/facto/ffm.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def __init__(

def predict_one(self, x):
x = self._ohe_cat_features(x)
return self._raw_dot(x)
return self._raw_dot(x).item()


class FFMClassifier(FFM, base.Classifier):
Expand Down
2 changes: 1 addition & 1 deletion river/facto/fm.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def __init__(

def predict_one(self, x):
x = self._ohe_cat_features(x)
return self._raw_dot(x)
return self._raw_dot(x).item()


class FMClassifier(FM, base.Classifier):
Expand Down
2 changes: 1 addition & 1 deletion river/facto/fwfm.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def __init__(

def predict_one(self, x):
x = self._ohe_cat_features(x)
return self._raw_dot(x)
return self._raw_dot(x).item()


class FwFMClassifier(FwFM, base.Classifier):
Expand Down
2 changes: 1 addition & 1 deletion river/facto/hofm.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def __init__(

def predict_one(self, x):
x = self._ohe_cat_features(x)
return self._raw_dot(x)
return self._raw_dot(x).item()


class HOFMClassifier(HOFM, base.Classifier):
Expand Down
4 changes: 2 additions & 2 deletions river/forest/adaptive_random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ class ARFClassifier(BaseForest, base.Classifier):
>>> metric = metrics.Accuracy()

>>> evaluate.progressive_val_score(dataset, model, metric)
Accuracy: 67.97%
Accuracy: 67.57%

The total number of warnings and drifts detected, respectively
>>> model.n_warnings_detected(), model.n_drifts_detected()
Expand Down Expand Up @@ -849,7 +849,7 @@ class ARFRegressor(BaseForest, base.Regressor):
>>> metric = metrics.MAE()

>>> evaluate.progressive_val_score(dataset, model, metric)
MAE: 0.772113
MAE: 0.793949

"""

Expand Down
2 changes: 1 addition & 1 deletion river/forest/online_extra_trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,7 @@ class OXTRegressor(ExtraTrees, base.Regressor):
>>> metric = metrics.RMSE()

>>> evaluate.progressive_val_score(dataset, model, metric)
RMSE: 3.16212
RMSE: 2.849735

References
----------
Expand Down
8 changes: 4 additions & 4 deletions river/imblearn/chebyshev.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,10 @@ class ChebyshevOverSampler(base.Wrapper, base.Regressor):
... metrics.MAE(),
... print_every=500
... )
[500] MAE: 1.629786
[1,000] MAE: 1.663799
[1,001] MAE: 1.66253
MAE: 1.66253
[500] MAE: 1.64417
[1,000] MAE: 1.676185
[1,001] MAE: 1.674668
MAE: 1.674668

References
----------
Expand Down
2 changes: 1 addition & 1 deletion river/linear_model/bayesian_lin_reg.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def predict_one(self, x, with_dist=False):
"""

# Bishop equation 3.58
y_pred_mean = utils.math.dot(self._m, x)
y_pred_mean = 0.0 if not len(self._m) else utils.math.dot(self._m, x).item()
if not with_dist:
return y_pred_mean

Expand Down
2 changes: 1 addition & 1 deletion river/naive_bayes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def one_hot_encode(y: pd.Series) -> pd.DataFrame:
"""
classes = np.unique(y)
indices = np.searchsorted(classes, y)
indptr = np.hstack((0, np.cumsum(np.in1d(y, classes))))
indptr = np.hstack((0, np.cumsum(np.isin(y, classes))))
data = np.empty_like(indices)
data.fill(1)
return pd.DataFrame.sparse.from_spmatrix(
Expand Down
2 changes: 1 addition & 1 deletion river/optim/initializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class Normal(Initializer):
>>> init = optim.initializers.Normal(mu=0, sigma=1, seed=42)

>>> init(shape=1)
0.496714
np.float64(0.4967141...)

>>> init(shape=2)
array([-0.1382643 , 0.64768854])
Expand Down
2 changes: 1 addition & 1 deletion river/preprocessing/lda.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def transform_one(self, x):
# Sample empirical topic assignment:
_, components = self._compute_statistics_components(words_indexes_list)

return dict(enumerate(components))
return dict(enumerate(components.tolist()))

def _update_indexes(self, word_list: typing.Iterable[str]):
"""
Expand Down
8 changes: 5 additions & 3 deletions river/preprocessing/scale.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,10 +212,12 @@ def learn_many(self, X: pd.DataFrame):
a = old_count / (old_count + new_count)
b = new_count / (old_count + new_count)

self.means[col] = a * old_mean + b * new_mean
self.means[col] = (a * old_mean + b * new_mean).item()
if self.with_std:
self.vars[col] = a * old_var + b * new_var + a * b * (old_mean - new_mean) ** 2
self.counts[col] += new_count
self.vars[col] = (
a * old_var + b * new_var + a * b * (old_mean - new_mean) ** 2
).item()
self.counts[col] += new_count.item()

def transform_many(self, X: pd.DataFrame):
"""Scale a mini-batch of features.
Expand Down
10 changes: 5 additions & 5 deletions river/proba/beta.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,20 +92,20 @@ def revert(self, x):
else:
self.beta -= 1

def __call__(self, p: float):
def __call__(self, p: float) -> float:
return (
p ** (self.alpha - 1) * (1 - p) ** (self.beta - 1) / _beta_func(self.alpha, self.beta)
)

def sample(self):
def sample(self) -> float:
return self._rng.betavariate(self.alpha, self.beta)

@property
def mode(self):
def mode(self) -> float:
try:
return (self.alpha - 1) / (self.alpha + self.beta - 2)
except ZeroDivisionError:
return 0.5

def cdf(self, x):
return scipy.special.betainc(self.alpha, self.beta, x)
def cdf(self, x) -> float:
return scipy.special.betainc(self.alpha, self.beta, x).item()
10 changes: 5 additions & 5 deletions river/proba/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def update(self, x, w=1.0):
def revert(self, x, w=1.0):
self._var.revert(x, w)

def __call__(self, x):
def __call__(self, x) -> float:
var = self._var.get()
if var:
try:
Expand All @@ -83,17 +83,17 @@ def __call__(self, x):
return 0.0
return 0.0

def cdf(self, x):
def cdf(self, x) -> float:
try:
return 0.5 * (1.0 + math.erf((x - self.mu) / (self.sigma * math.sqrt(2.0))))
except ZeroDivisionError:
return 0.0

def sample(self):
def sample(self) -> float:
return self._rng.gauss(self.mu, self.sigma)

@property
def mode(self):
def mode(self) -> float:
return self.mu


Expand Down Expand Up @@ -207,7 +207,7 @@ class MultivariateGaussian(base.MultivariateContinuousDistribution):
>>> multi.mu['blue'] == single.mu
True
>>> multi.sigma['blue']['blue'] == single.sigma
True
np.True_

"""

Expand Down
2 changes: 1 addition & 1 deletion river/reco/biased_mf.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def predict_one(self, user, item, x=None):
# Add the dot product of the user and the item latent vectors
y_pred += np.dot(self.u_latents[user], self.i_latents[item])

return y_pred
return y_pred.item()

def learn_one(self, user, item, y, x=None):
# Update the global mean
Expand Down
Loading
Loading