Skip to content

Commit 6b16285

Browse files
feat: expose select_exprs method on DataFrame (apache#1271)
* feat: expose select_exprs method on DataFrame * change python doc * ruff linting --------- Co-authored-by: Tim Saucer <[email protected]>
1 parent 08901d5 commit 6b16285

File tree

3 files changed

+50
-0
lines changed

3 files changed

+50
-0
lines changed

python/datafusion/dataframe.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,17 @@ def select_columns(self, *args: str) -> DataFrame:
405405
"""
406406
return self.select(*args)
407407

408+
def select_exprs(self, *args: str) -> DataFrame:
409+
"""Project arbitrary list of expression strings into a new DataFrame.
410+
411+
This method will parse string expressions into logical plan expressions.
412+
The output DataFrame has one column for each expression.
413+
414+
Returns:
415+
DataFrame only containing the specified columns.
416+
"""
417+
return self.df.select_exprs(*args)
418+
408419
def select(self, *exprs: Expr | str) -> DataFrame:
409420
"""Project arbitrary expressions into a new :py:class:`DataFrame`.
410421

python/tests/test_dataframe.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,38 @@ def test_select(df):
221221
assert result.column(1) == pa.array([1, 2, 3])
222222

223223

224+
def test_select_exprs(df):
225+
df_1 = df.select_exprs(
226+
"a + b",
227+
"a - b",
228+
)
229+
230+
# execute and collect the first (and only) batch
231+
result = df_1.collect()[0]
232+
233+
assert result.column(0) == pa.array([5, 7, 9])
234+
assert result.column(1) == pa.array([-3, -3, -3])
235+
236+
df_2 = df.select_exprs("b", "a")
237+
238+
# execute and collect the first (and only) batch
239+
result = df_2.collect()[0]
240+
241+
assert result.column(0) == pa.array([4, 5, 6])
242+
assert result.column(1) == pa.array([1, 2, 3])
243+
244+
df_3 = df.select_exprs(
245+
"abs(a + b)",
246+
"abs(a - b)",
247+
)
248+
249+
# execute and collect the first (and only) batch
250+
result = df_3.collect()[0]
251+
252+
assert result.column(0) == pa.array([5, 7, 9])
253+
assert result.column(1) == pa.array([3, 3, 3])
254+
255+
224256
def test_drop_quoted_columns():
225257
ctx = SessionContext()
226258
batch = pa.RecordBatch.from_arrays([pa.array([1, 2, 3])], names=["ID_For_Students"])

src/dataframe.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,13 @@ impl PyDataFrame {
435435
Ok(Self::new(df))
436436
}
437437

438+
#[pyo3(signature = (*args))]
439+
fn select_exprs(&self, args: Vec<PyBackedStr>) -> PyDataFusionResult<Self> {
440+
let args = args.iter().map(|s| s.as_ref()).collect::<Vec<&str>>();
441+
let df = self.df.as_ref().clone().select_exprs(&args)?;
442+
Ok(Self::new(df))
443+
}
444+
438445
#[pyo3(signature = (*args))]
439446
fn select(&self, args: Vec<PyExpr>) -> PyDataFusionResult<Self> {
440447
let expr: Vec<Expr> = args.into_iter().map(|e| e.into()).collect();

0 commit comments

Comments
 (0)