diff --git a/lib/polars/data_frame.rb b/lib/polars/data_frame.rb index 18128638d2..283c1b1a84 100644 --- a/lib/polars/data_frame.rb +++ b/lib/polars/data_frame.rb @@ -1848,10 +1848,13 @@ def tail(n = 5) _from_rbdf(_df.tail(n)) end - # Return a new DataFrame where the null values are dropped. + # Drop all rows that contain one or more null values. + # + # The original order of the remaining rows is preserved. # # @param subset [Object] - # Subset of column(s) on which `drop_nulls` will be applied. + # Column name(s) for which null values are considered. + # If set to `nil` (default), use all columns. # # @return [DataFrame] # @@ -1860,20 +1863,31 @@ def tail(n = 5) # { # "foo" => [1, 2, 3], # "bar" => [6, nil, 8], - # "ham" => ["a", "b", "c"] + # "ham" => ["a", "b", nil], # } # ) # df.drop_nulls # # => - # # shape: (2, 3) + # # shape: (1, 3) # # ┌─────┬─────┬─────┐ # # │ foo ┆ bar ┆ ham │ # # │ --- ┆ --- ┆ --- │ # # │ i64 ┆ i64 ┆ str │ # # ╞═════╪═════╪═════╡ # # │ 1 ┆ 6 ┆ a │ - # # │ 3 ┆ 8 ┆ c │ # # └─────┴─────┴─────┘ + # @example + # df.drop_nulls(subset: Polars.cs.integer) + # # => + # # shape: (2, 3) + # # ┌─────┬─────┬──────┐ + # # │ foo ┆ bar ┆ ham │ + # # │ --- ┆ --- ┆ --- │ + # # │ i64 ┆ i64 ┆ str │ + # # ╞═════╪═════╪══════╡ + # # │ 1 ┆ 6 ┆ a │ + # # │ 3 ┆ 8 ┆ null │ + # # └─────┴─────┴──────┘ def drop_nulls(subset: nil) lazy.drop_nulls(subset: subset).collect(_eager: true) end diff --git a/lib/polars/lazy_frame.rb b/lib/polars/lazy_frame.rb index 0da9cdc521..b096b6efb3 100644 --- a/lib/polars/lazy_frame.rb +++ b/lib/polars/lazy_frame.rb @@ -3307,37 +3307,52 @@ def unique(maintain_order: true, subset: nil, keep: "first") _from_rbldf(_ldf.unique(maintain_order, selector_subset, keep)) end - # Drop rows with null values from this LazyFrame. + # Drop all rows that contain one or more null values. + # + # The original order of the remaining rows is preserved. # # @param subset [Object] - # Subset of column(s) on which `drop_nulls` will be applied. + # Column name(s) for which null values are considered. + # If set to `nil` (default), use all columns. # # @return [LazyFrame] # # @example - # df = Polars::DataFrame.new( + # lf = Polars::LazyFrame.new( # { - # "foo" => [1, 2, 3], - # "bar" => [6, nil, 8], - # "ham" => ["a", "b", "c"] + # "foo": [1, 2, 3], + # "bar": [6, nil, 8], + # "ham": ["a", "b", nil], # } # ) - # df.lazy.drop_nulls.collect + # lf.drop_nulls.collect # # => - # # shape: (2, 3) + # # shape: (1, 3) # # ┌─────┬─────┬─────┐ # # │ foo ┆ bar ┆ ham │ # # │ --- ┆ --- ┆ --- │ # # │ i64 ┆ i64 ┆ str │ # # ╞═════╪═════╪═════╡ # # │ 1 ┆ 6 ┆ a │ - # # │ 3 ┆ 8 ┆ c │ # # └─────┴─────┴─────┘ + # @example + # lf.drop_nulls(subset: Polars.cs.integer).collect + # # => + # # shape: (2, 3) + # # ┌─────┬─────┬──────┐ + # # │ foo ┆ bar ┆ ham │ + # # │ --- ┆ --- ┆ --- │ + # # │ i64 ┆ i64 ┆ str │ + # # ╞═════╪═════╪══════╡ + # # │ 1 ┆ 6 ┆ a │ + # # │ 3 ┆ 8 ┆ null │ + # # └─────┴─────┴──────┘ def drop_nulls(subset: nil) - if !subset.nil? && !subset.is_a?(::Array) - subset = [subset] + selector_subset = nil + if !subset.nil? + selector_subset = Utils.parse_list_into_selector(subset)._rbselector end - _from_rbldf(_ldf.drop_nulls(subset)) + _from_rbldf(_ldf.drop_nulls(selector_subset)) end # Unpivot a DataFrame from wide to long format.