Skip to content

Commit 2de6380

Browse files
committed
Added serialize and deserialize methods to DataFrame
1 parent 65ea390 commit 2de6380

File tree

4 files changed

+107
-11
lines changed

4 files changed

+107
-11
lines changed

CHANGELOG.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
## 0.21.2 (unreleased)
22

3-
- Added `serialize` and `deserialize` methods to `LazyFrame`
4-
- Added `serialize` and `deserialize` methods to `Expr`
3+
- Added `serialize` and `deserialize` methods to `DataFrame`, `LazyFrame`, and `Expr`
54
- Added `storage_options` and `retries` options to `sink_ipc` method
65
- Added experimental support for Iceberg
76
- Added experimental `cast_options` option to `scan_parquet` method

ext/polars/src/dataframe/serde.rs

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,30 @@
1-
use crate::exceptions::ComputeError;
1+
use std::io::{BufReader, BufWriter};
2+
3+
use polars::prelude::*;
4+
25
use crate::file::get_file_like;
3-
use crate::{RbDataFrame, RbResult};
6+
use crate::utils::to_rb_err;
7+
use crate::{RbDataFrame, RbPolarsErr, RbResult};
48
use magnus::Value;
5-
use std::io::BufWriter;
69

710
impl RbDataFrame {
8-
// TODO add to Ruby
9-
pub fn serialize_json(&self, rb_f: Value) -> RbResult<()> {
11+
pub fn serialize_binary(&self, rb_f: Value) -> RbResult<()> {
1012
let file = get_file_like(rb_f, true)?;
11-
let writer = BufWriter::new(file);
12-
serde_json::to_writer(writer, &self.df)
13-
.map_err(|err| ComputeError::new_err(err.to_string()))
13+
let mut writer = BufWriter::new(file);
14+
15+
Ok(self
16+
.df
17+
.borrow_mut()
18+
.serialize_into_writer(&mut writer)
19+
.map_err(RbPolarsErr::from)?)
20+
}
21+
22+
pub fn deserialize_binary(rb_f: Value) -> RbResult<Self> {
23+
let file = get_file_like(rb_f, false)?;
24+
let mut file = BufReader::new(file);
25+
26+
DataFrame::deserialize_from_reader(&mut file)
27+
.map(|v| v.into())
28+
.map_err(to_rb_err)
1429
}
1530
}

ext/polars/src/lib.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,14 @@ fn init(ruby: &Ruby) -> RbResult<()> {
158158
class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
159159
class.define_method("unnest", method!(RbDataFrame::unnest, 1))?;
160160
class.define_method("clear", method!(RbDataFrame::clear, 0))?;
161-
class.define_method("serialize_json", method!(RbDataFrame::serialize_json, 1))?;
161+
class.define_method(
162+
"serialize_binary",
163+
method!(RbDataFrame::serialize_binary, 1),
164+
)?;
165+
class.define_singleton_method(
166+
"deserialize_binary",
167+
function!(RbDataFrame::deserialize_binary, 1),
168+
)?;
162169

163170
let class = module.define_class("RbExpr", ruby.class_object())?;
164171
class.define_method("+", method!(RbExpr::add, 1))?;

lib/polars/data_frame.rb

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,43 @@ def initialize(data = nil, schema: nil, schema_overrides: nil, strict: true, ori
7272
end
7373
end
7474

75+
# Read a serialized DataFrame from a file.
76+
#
77+
# @param source [Object]
78+
# Path to a file or a file-like object (by file-like object, we refer to
79+
# objects that have a `read` method, such as a file handler or `StringIO`).
80+
#
81+
# @return [DataFrame]
82+
#
83+
# @note
84+
# Serialization is not stable across Polars versions: a LazyFrame serialized
85+
# in one Polars version may not be deserializable in another Polars version.
86+
#
87+
# @example
88+
# df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => [4.0, 5.0, 6.0]})
89+
# bytes = df.serialize
90+
# Polars::DataFrame.deserialize(StringIO.new(bytes))
91+
# # =>
92+
# # shape: (3, 2)
93+
# # ┌─────┬─────┐
94+
# # │ a ┆ b │
95+
# # │ --- ┆ --- │
96+
# # │ i64 ┆ f64 │
97+
# # ╞═════╪═════╡
98+
# # │ 1 ┆ 4.0 │
99+
# # │ 2 ┆ 5.0 │
100+
# # │ 3 ┆ 6.0 │
101+
# # └─────┴─────┘
102+
def self.deserialize(source)
103+
if Utils.pathlike?(source)
104+
source = Utils.normalize_filepath(source)
105+
end
106+
107+
deserializer = RbDataFrame.method(:deserialize_binary)
108+
109+
_from_rbdf(deserializer.(source))
110+
end
111+
75112
# @private
76113
def self._from_rbdf(rb_df)
77114
df = DataFrame.allocate
@@ -627,6 +664,44 @@ def to_series(index = 0)
627664
Utils.wrap_s(_df.select_at_idx(index))
628665
end
629666

667+
# Serialize this DataFrame to a file or string.
668+
#
669+
# @param file [Object]
670+
# File path or writable file-like object to which the result will be written.
671+
# If set to `None` (default), the output is returned as a string instead.
672+
#
673+
# @return [Object]
674+
#
675+
# @note
676+
# Serialization is not stable across Polars versions: a LazyFrame serialized
677+
# in one Polars version may not be deserializable in another Polars version.
678+
#
679+
# @example
680+
# df = Polars::DataFrame.new(
681+
# {
682+
# "foo" => [1, 2, 3],
683+
# "bar" => [6, 7, 8]
684+
# }
685+
# )
686+
# bytes = df.serialize
687+
# Polars::DataFrame.deserialize(StringIO.new(bytes))
688+
# # =>
689+
# # shape: (3, 2)
690+
# # ┌─────┬─────┐
691+
# # │ foo ┆ bar │
692+
# # │ --- ┆ --- │
693+
# # │ i64 ┆ i64 │
694+
# # ╞═════╪═════╡
695+
# # │ 1 ┆ 6 │
696+
# # │ 2 ┆ 7 │
697+
# # │ 3 ┆ 8 │
698+
# # └─────┴─────┘
699+
def serialize(file = nil)
700+
serializer = _df.method(:serialize_binary)
701+
702+
Utils.serialize_polars_object(serializer, file)
703+
end
704+
630705
# Serialize to JSON representation.
631706
#
632707
# @param file [String]

0 commit comments

Comments
 (0)