Skip to content

Commit

Permalink
Merge branch 'main' into issue19
Browse files Browse the repository at this point in the history
  • Loading branch information
vincentarelbundock committed Jan 15, 2025
2 parents c7273a0 + ba10c35 commit 93aeab2
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 0 deletions.
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# Development

New functions:

* `get_dataset()`
* `fit_sklearn()`, `fit_statsmodels()`, `fit_linearmodels()`

# 0.0.14

* Thanks to Narwhals, marginaleffects can now ingest data frames in multiple formats and convert them to the Polars representation that we need internally. This no longer requires external dependencies like Pandas or DuckDB. Thanks to @artiom-matvei.
Expand Down
2 changes: 2 additions & 0 deletions marginaleffects/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .model_statsmodels import fit_statsmodels
from .model_sklearn import fit_sklearn
from .model_linearmodels import fit_linearmodels
from .utils import get_dataset

__all__ = [
"avg_comparisons",
Expand All @@ -25,4 +26,5 @@
"fit_statsmodels",
"fit_sklearn",
"fit_linearmodels",
"get_dataset",
]
44 changes: 44 additions & 0 deletions marginaleffects/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,47 @@ def wrapper(*args, **kwargs):
return validator(*args, **kwargs)

return wrapper


def get_dataset(dataset: str, docs: bool = False):
"""
Download and read a dataset as a Polars DataFrame or return documentation link.
Parameters
----------
dataset : str
The dataset to download. Must be one of "affairs", "airbnb", "immigration", "military", "thornton".
docs : bool, optional
If True, return the documentation URL instead of the dataset. Default is False.
Returns
-------
Union[str, pl.DataFrame]
A string representing the documentation URL if `docs` is True, or
a Polars DataFrame containing the dataset if `docs` is False.
Raises
------
ValueError
If the dataset is not among the specified choices.
"""
datasets = {
"affairs": "https://marginaleffects.com/data/affairs",
"airbnb": "https://marginaleffects.com/data/airbnb",
"immigration": "https://marginaleffects.com/data/immigration",
"military": "https://marginaleffects.com/data/military",
"thornton": "https://marginaleffects.com/data/thornton",
}

if dataset not in datasets:
raise ValueError(
f"Invalid dataset choice. Expected one of {list(datasets.keys())}."
)

base_url = datasets[dataset]

if docs:
return f"{base_url}.html"

df = pl.read_parquet(f"{base_url}.parquet")
return df

0 comments on commit 93aeab2

Please sign in to comment.