Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions lib/polars/data_frame.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1848,10 +1848,13 @@ def tail(n = 5)
_from_rbdf(_df.tail(n))
end

# Return a new DataFrame where the null values are dropped.
# Drop all rows that contain one or more null values.
#
# The original order of the remaining rows is preserved.
#
# @param subset [Object]
# Subset of column(s) on which `drop_nulls` will be applied.
# Column name(s) for which null values are considered.
# If set to `nil` (default), use all columns.
#
# @return [DataFrame]
#
Expand All @@ -1860,20 +1863,31 @@ def tail(n = 5)
# {
# "foo" => [1, 2, 3],
# "bar" => [6, nil, 8],
# "ham" => ["a", "b", "c"]
# "ham" => ["a", "b", nil],
# }
# )
# df.drop_nulls
# # =>
# # shape: (2, 3)
# # shape: (1, 3)
# # ┌─────┬─────┬─────┐
# # │ foo ┆ bar ┆ ham │
# # │ --- ┆ --- ┆ --- │
# # │ i64 ┆ i64 ┆ str │
# # ╞═════╪═════╪═════╡
# # │ 1 ┆ 6 ┆ a │
# # │ 3 ┆ 8 ┆ c │
# # └─────┴─────┴─────┘
# @example
# df.drop_nulls(subset: Polars.cs.integer)
# # =>
# # shape: (2, 3)
# # ┌─────┬─────┬──────┐
# # │ foo ┆ bar ┆ ham │
# # │ --- ┆ --- ┆ --- │
# # │ i64 ┆ i64 ┆ str │
# # ╞═════╪═════╪══════╡
# # │ 1 ┆ 6 ┆ a │
# # │ 3 ┆ 8 ┆ null │
# # └─────┴─────┴──────┘
def drop_nulls(subset: nil)
lazy.drop_nulls(subset: subset).collect(_eager: true)
end
Expand Down
39 changes: 27 additions & 12 deletions lib/polars/lazy_frame.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3307,37 +3307,52 @@ def unique(maintain_order: true, subset: nil, keep: "first")
_from_rbldf(_ldf.unique(maintain_order, selector_subset, keep))
end

# Drop rows with null values from this LazyFrame.
# Drop all rows that contain one or more null values.
#
# The original order of the remaining rows is preserved.
#
# @param subset [Object]
# Subset of column(s) on which `drop_nulls` will be applied.
# Column name(s) for which null values are considered.
# If set to `nil` (default), use all columns.
#
# @return [LazyFrame]
#
# @example
# df = Polars::DataFrame.new(
# lf = Polars::LazyFrame.new(
# {
# "foo" => [1, 2, 3],
# "bar" => [6, nil, 8],
# "ham" => ["a", "b", "c"]
# "foo": [1, 2, 3],
# "bar": [6, nil, 8],
# "ham": ["a", "b", nil],
# }
# )
# df.lazy.drop_nulls.collect
# lf.drop_nulls.collect
# # =>
# # shape: (2, 3)
# # shape: (1, 3)
# # ┌─────┬─────┬─────┐
# # │ foo ┆ bar ┆ ham │
# # │ --- ┆ --- ┆ --- │
# # │ i64 ┆ i64 ┆ str │
# # ╞═════╪═════╪═════╡
# # │ 1 ┆ 6 ┆ a │
# # │ 3 ┆ 8 ┆ c │
# # └─────┴─────┴─────┘
# @example
# lf.drop_nulls(subset: Polars.cs.integer).collect
# # =>
# # shape: (2, 3)
# # ┌─────┬─────┬──────┐
# # │ foo ┆ bar ┆ ham │
# # │ --- ┆ --- ┆ --- │
# # │ i64 ┆ i64 ┆ str │
# # ╞═════╪═════╪══════╡
# # │ 1 ┆ 6 ┆ a │
# # │ 3 ┆ 8 ┆ null │
# # └─────┴─────┴──────┘
def drop_nulls(subset: nil)
if !subset.nil? && !subset.is_a?(::Array)
subset = [subset]
selector_subset = nil
if !subset.nil?
selector_subset = Utils.parse_list_into_selector(subset)._rbselector
end
_from_rbldf(_ldf.drop_nulls(subset))
_from_rbldf(_ldf.drop_nulls(selector_subset))
end

# Unpivot a DataFrame from wide to long format.
Expand Down