Skip to content

Commit

Permalink
Fix and rename df_to_(svelte->html)_table (#97)
Browse files Browse the repository at this point in the history
* fix broken nature links

* fix bad script or default_script string replacement

* move DEFAULT_DF_STYLES to module scope

* df_to_pdf change default size = f"{n_cols * 4}cm {n_rows * 2}cm"

* df_to_svelte_table use same default_styles as df_to_pdf

* rename df_to_(svelte->html)_table

* rename default_styles to styler_css and support type dict[str, str]

* raise raise ValueError(f"Got {inline_props=} but no '<table ...' tag found in HTML string to ...")

* fix test_df_to_pdf
  • Loading branch information
janosh authored Oct 28, 2023
1 parent 62cd125 commit 661ba69
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 46 deletions.
2 changes: 1 addition & 1 deletion dataset_exploration/boltztrap_mp/explore_boltztrap_mp.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
Reference:
Ricci, F. et al. An ab initio electronic transport database for inorganic materials.
https://nature.com/articles/sdata201785
https://www.nature.com/articles/sdata201785
Dryad Digital Repository. https://doi.org/10.5061/dryad.gn001
https://hackingmaterials.lbl.gov/matminer/dataset_summary.html
Expand Down
6 changes: 3 additions & 3 deletions dataset_exploration/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

The majority of datasets explored in this directory are from the [`matbench`](https://matbench.materialsproject.org) collection. Others include:

- [`ricci_carrier_transport`](https://hackingmaterials.lbl.gov/matminer/dataset_summary): [Electronic Transport Properties by F. Ricci et al.][carrier_transport] from MPContribs which contains 48,000 DFT Seebeck coefficients ([Paper](https://nature.com/articles/sdata201785)). [[Download link][carrier_transport.json.gz] (from [here](https://git.io/JOMwY))].
- [`ricci_carrier_transport`](https://hackingmaterials.lbl.gov/matminer/dataset_summary): [Electronic Transport Properties by F. Ricci et al.][carrier_transport] from MPContribs which contains 48,000 DFT Seebeck coefficients ([Paper](https://www.nature.com/articles/sdata201785)). [[Download link][carrier_transport.json.gz] (from [here](https://git.io/JOMwY))].
- [`boltztrap_mp`](https://hackingmaterials.lbl.gov/matminer/dataset_summary) which contains ~9000 effective mass and thermoelectric properties calculated by the BoltzTraP software package.
- [`tri_camd_2022`](https://data.matr.io/7): Toyota Research Institute's 2nd active learning crystal discovery dataset from Computational Autonomy for
Materials Discovery (CAMD)
- `WBM`: From the paper [Predicting stable crystalline compounds using chemical similarity](https://nature.com/articles/s41524-020-00481-6) published Jan 26, 2021 in Nature. A dataset generated with DFT building on earlier work by some of the same authors published in [The optimal one dimensional periodic table: a modified Pettifor chemical scale from data mining](https://doi.org/10.1088/1367-2630/18/9/093011). Kindly shared by the author Hai-Chen Wang on email request.
- `WBM`: From the paper [Predicting stable crystalline compounds using chemical similarity](https://www.nature.com/articles/s41524-020-00481-6) published Jan 26, 2021 in Nature. A dataset generated with DFT building on earlier work by some of the same authors published in [The optimal one dimensional periodic table: a modified Pettifor chemical scale from data mining](https://doi.org/10.1088/1367-2630/18/9/093011). Kindly shared by the author Hai-Chen Wang on email request.

## [MatBench v0.1](https://matbench.materialsproject.org)

Expand All @@ -15,7 +15,7 @@ Materials Discovery (CAMD)
> MatBench is an [ImageNet](http://www.image-net.org) for materials science; a set of 13 supervised, pre-cleaned, ready-to-use ML tasks for benchmarking and fair comparison. The tasks span across the domain of inorganic materials science applications.
To browse these datasets online, go to [ml.materialsproject.org] and log in.
Datasets were originally published in <https://nature.com/articles/s41524-020-00406-3>.
Datasets were originally published in <https://www.nature.com/articles/s41524-020-00406-3>.

Detailed information about how each dataset was created and prepared for use is available at <https://hackingmaterials.lbl.gov/matminer/dataset_summary.html>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
Reference:
Ricci, F. et al. An ab initio electronic transport database for inorganic materials.
https://nature.com/articles/sdata201785
https://www.nature.com/articles/sdata201785
Dryad Digital Repository. https://doi.org/10.5061/dryad.gn001
Extensive column descriptions and metadata at
Expand Down
74 changes: 47 additions & 27 deletions pymatviz/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from os.path import dirname
from shutil import which
from time import sleep
from typing import TYPE_CHECKING, Any, Literal
from typing import TYPE_CHECKING, Any, Final, Literal

import matplotlib.pyplot as plt
import plotly.graph_objects as go
Expand Down Expand Up @@ -146,13 +146,21 @@ def save_and_compress_svg(
subprocess.run([svgo, "--multipass", filepath], check=True)


DEFAULT_DF_STYLES: Final = {
"": "font-family: sans-serif; border-collapse: collapse;",
"td, th": "border: none; padding: 4px 6px; white-space: nowrap;",
"th.col_heading": "border: 1px solid; border-width: 1px 0; text-align: left;",
"th.row_heading": "font-weight: normal; padding: 3pt;",
}


def df_to_pdf(
styler: Styler,
file_path: str | Path,
crop: bool = True,
size: str | None = None,
style: str = "",
default_styles: bool = True,
styler_css: bool | dict[str, str] = True,
**kwargs: Any,
) -> None:
"""Export a pandas Styler to PDF with WeasyPrint.
Expand All @@ -163,12 +171,14 @@ def df_to_pdf(
crop (bool): Whether to crop the PDF margins. Requires pdfCropMargins.
Defaults to True. Be careful to set size correctly (not much too large as
is the default) if you set crop=False.
size (str): Page size. Defaults to "100cm". See
https://developer.mozilla.org/@page for 'landscape' and other options.
size (str): Page size. Defaults to "4cm * n_cols x 2cm * n_rows"
(width x height). See https://developer.mozilla.org/@page for 'landscape'
and other options.
style (str): CSS style string to be inserted into the HTML file.
Defaults to "".
default_styles (bool): Whether to apply some sensible default CSS.
Defaults to True.
styler_css (bool | dict[str, str]): Whether to apply some sensible default CSS
to the pandas Styler. Defaults to True. If dict, keys are selectors and
values CSS strings. Example: {"td, th": "border: none; padding: 4px 6px;"}
**kwargs: Keyword arguments passed to Styler.to_html().
"""
try:
Expand All @@ -177,25 +187,18 @@ def df_to_pdf(
msg = "weasyprint not installed\nrun pip install weasyprint"
raise ImportError(msg) from exc

if default_styles:
# Apply default styles
styles = {
"": "font-family: sans-serif; border-collapse: collapse;",
"td, th": "border: none; padding: 4px 6px; white-space: nowrap;",
"th.col_heading": "border: 1px solid; border-width: 1px 0; "
"text-align: left;",
"th.row_heading": "font-weight: normal; padding: 3pt;",
}
if styler_css:
styler_css = styler_css if isinstance(styler_css, dict) else DEFAULT_DF_STYLES
styler.set_table_styles(
[dict(selector=sel, props=styles[sel]) for sel in styles]
[dict(selector=sel, props=val) for sel, val in styler_css.items()]
)
styler.set_uuid("")

styler.set_uuid("")
html_str = styler.to_html(**kwargs)

if size is None:
n_rows, n_cols = styler.data.shape
size = f"{n_cols * 3}cm {n_rows * 1}cm"
size = f"{n_cols * 4}cm {n_rows * 2}cm"

# CSS to adjust layout and margins
html_str = f"""
Expand Down Expand Up @@ -269,12 +272,13 @@ def normalize_and_crop_pdf(
raise RuntimeError("Error cropping PDF margins") from exc


def df_to_svelte_table(
def df_to_html_table(
styler: Styler,
file_path: str | Path,
inline_props: str = "",
inline_props: str | None = "",
script: str | None = "",
styles: str | None = "table { overflow: scroll; max-width: 100%; display: block; }",
styler_css: bool | dict[str, str] = True,
**kwargs: Any,
) -> None:
"""Convert a pandas Styler to a svelte table.
Expand All @@ -284,12 +288,16 @@ def df_to_svelte_table(
file_path (str): Path to the file to write the svelte table to.
inline_props (str): Inline props to pass to the table element. Example:
"class='table' style='width: 100%'". Defaults to "".
script (str): JavaScript to insert above the table. Will replace the opening
`<table` tag to allow passing props to it. Uses ...props to allow for
Svelte props forwarding to the table element. See source code for lengthy
default script.
script (str): JavaScript string to insert above the table. Will replace the
opening `<table` tag to allow passing props to it. The default script uses
...props to allow for Svelte props forwarding to the table element. See
source code to inspect default script. Don't forget to include '<table' in
the somewhere in the script. Defaults to "".
styles (str): CSS rules to add to the table styles. Defaults to
`table { overflow: scroll; max-width: 100%; display: block; }`.
styler_css (bool | dict[str, str]): Whether to apply some sensible default CSS
to the pandas Styler. Defaults to True. If dict, keys are selectors and
values CSS strings. Example: {"td, th": "border: none; padding: 4px 6px;"}
**kwargs: Keyword arguments passed to Styler.to_html().
"""
default_script = """<script lang="ts">
Expand All @@ -298,13 +306,25 @@ def df_to_svelte_table(
<table use:sortable {...$$props}
"""

styler.set_uuid("")
if styler_css:
styler_css = styler_css if isinstance(styler_css, dict) else DEFAULT_DF_STYLES
styler.set_table_styles(
[dict(selector=sel, props=val) for sel, val in styler_css.items()]
)
html = styler.to_html(**kwargs)
if script:
html = html.replace("<table", f"{script or default_script}")
if inline_props:
if "<table " not in html:
raise ValueError(
f"Got {inline_props=} but no '<table ...' tag found in HTML string to "
"attach to"
)
html = html.replace("<table", f"<table {inline_props}")
if script is not None:
html = html.replace("<table", f"<table {script or default_script}")
if styles is not None:
# insert styles at end of closing </style> tag so they override default styles
html = html.replace("</style>", f"{styles}</style>")
html = html.replace("</style>", f"{styles}\n</style>")
with open(file_path, "w") as file:
file.write(html)
40 changes: 26 additions & 14 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import pytest
from matplotlib import pyplot as plt

from pymatviz.io import df_to_pdf, df_to_svelte_table, normalize_and_crop_pdf, save_fig
from pymatviz.io import df_to_html_table, df_to_pdf, normalize_and_crop_pdf, save_fig


if TYPE_CHECKING:
Expand Down Expand Up @@ -67,7 +67,7 @@ def test_plotly_pdf_no_mathjax_loading(tmp_path: Path) -> None:
PyPDF2 = pytest.importorskip("PyPDF2")

fig = go.Figure()
fig.add_trace(go.Scatter(x=[1, 2], y=[3, 4]))
fig.add_scatter(x=[1, 2], y=[3, 4])
path = f"{tmp_path}/test.pdf"
save_fig(fig, path)

Expand All @@ -83,7 +83,7 @@ def test_plotly_pdf_no_mathjax_loading(tmp_path: Path) -> None:
# https://stackoverflow.com/a/69816601
@pytest.mark.skipif(sys.platform == "win32", reason="fails on Windows")
@pytest.mark.parametrize(
"crop, size, style, default_styles",
"crop, size, style, styler_css",
[
# test with cropping, default size, and no extra style
# TODO test crop=True in CI, kept failing with FileNotFoundError: No such file
Expand All @@ -98,7 +98,7 @@ def test_df_to_pdf(
crop: bool,
size: str,
style: str,
default_styles: bool,
styler_css: bool,
tmp_path: Path,
capsys: pytest.CaptureFixture[str],
) -> None:
Expand All @@ -121,7 +121,7 @@ def test_df_to_pdf(
crop=crop,
size=size,
style=style,
default_styles=default_styles,
styler_css=styler_css,
)
try:
df_to_pdf(**kwds)
Expand Down Expand Up @@ -177,26 +177,37 @@ def test_normalize_and_crop_pdf(


@pytest.mark.parametrize(
"script, styles, inline_props",
"script, styles, inline_props, styler_css",
[
(None, None, ""),
("", "body { margin: 0; padding: 1em; }", "class='table'"),
(None, None, "", False),
(None, "", None, False),
("", "body { margin: 0; padding: 1em; }", "<table class='table'", True),
(
"import { sortable } from 'svelte-zoo/actions'",
"<script>import { sortable } from 'svelte-zoo/actions'<s/script><table",
"body { margin: 0; padding: 1em; }",
"style='width: 100%'",
{"tb, th, td": "border: 1px solid black;"},
),
],
)
def test_df_to_svelte_table(
tmp_path: Path, script: str, styles: str, inline_props: str
def test_df_to_html_table(
tmp_path: Path,
script: str | None,
styles: str | None,
inline_props: str,
styler_css: bool | dict[str, str],
) -> None:
df = pd._testing.makeMixedDataFrame()

file_path = tmp_path / "test_df.svelte"

df_to_svelte_table(
df.style, file_path, script=script, styles=styles, inline_props=inline_props
df_to_html_table(
df.style,
file_path,
script=script,
styles=styles,
inline_props=inline_props,
styler_css=styler_css,
)

assert file_path.is_file()
Expand All @@ -205,8 +216,9 @@ def test_df_to_svelte_table(
if script is not None:
assert script in content
if styles is not None:
assert f"{styles}</style>" in content
assert f"{styles}\n</style>" in content
if inline_props:
print(f"{content=}")
assert inline_props in content

# check file contains original dataframe value
Expand Down

0 comments on commit 661ba69

Please sign in to comment.