Skip to content

Commit bb647a5

Browse files
committed
Case evaluation improvements
- Short circuit evaluation when selection vector is all false - Avoid filtering record batch when selection vector is all true
1 parent 10a437b commit bb647a5

File tree

1 file changed

+40
-15
lines changed
  • datafusion/physical-expr/src/expressions

1 file changed

+40
-15
lines changed

datafusion/physical-expr/src/expressions/case.rs

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -208,10 +208,17 @@ impl CaseExpr {
208208
let mut current_value = new_null_array(&return_type, batch.num_rows());
209209
// We only consider non-null values while comparing with whens
210210
let mut remainder = not(&base_nulls)?;
211+
let mut remainder_count = remainder.true_count();
211212
for i in 0..self.when_then_expr.len() {
212-
let when_value = self.when_then_expr[i]
213-
.0
214-
.evaluate_selection(batch, &remainder)?;
213+
if remainder_count == 0 {
214+
break;
215+
}
216+
217+
let when_value = if remainder_count == batch.num_rows() {
218+
self.when_then_expr[i].0.evaluate(batch)?
219+
} else {
220+
self.when_then_expr[i].0.evaluate_selection(batch, &remainder)?
221+
};
215222
let when_value = when_value.into_array(batch.num_rows())?;
216223
// build boolean array representing which rows match the "when" value
217224
let when_match = compare_with_eq(
@@ -230,13 +237,18 @@ impl CaseExpr {
230237
let when_match = and(&when_match, &remainder)?;
231238

232239
// When no rows available for when clause, skip then clause
233-
if when_match.true_count() == 0 {
240+
let when_match_count = when_match.true_count();
241+
if when_match_count == 0 {
234242
continue;
235243
}
236244

237-
let then_value = self.when_then_expr[i]
238-
.1
239-
.evaluate_selection(batch, &when_match)?;
245+
let then_value = if when_match_count == batch.num_rows() {
246+
self.when_then_expr[i].1.evaluate(batch)?
247+
} else {
248+
self.when_then_expr[i]
249+
.1
250+
.evaluate_selection(batch, &when_match)?
251+
};
240252

241253
current_value = match then_value {
242254
ColumnarValue::Scalar(ScalarValue::Null) => {
@@ -251,6 +263,7 @@ impl CaseExpr {
251263
};
252264

253265
remainder = and_not(&remainder, &when_match)?;
266+
remainder_count -= when_match_count;
254267
}
255268

256269
if let Some(e) = self.else_expr() {
@@ -280,10 +293,13 @@ impl CaseExpr {
280293
// start with nulls as default output
281294
let mut current_value = new_null_array(&return_type, batch.num_rows());
282295
let mut remainder = BooleanArray::from(vec![true; batch.num_rows()]);
296+
let mut remainder_count = batch.num_rows();
283297
for i in 0..self.when_then_expr.len() {
284-
let when_value = self.when_then_expr[i]
285-
.0
286-
.evaluate_selection(batch, &remainder)?;
298+
let when_value = if remainder_count == batch.num_rows() {
299+
self.when_then_expr[i].0.evaluate(batch)?
300+
} else {
301+
self.when_then_expr[i].0.evaluate_selection(batch, &remainder)?
302+
};
287303
let when_value = when_value.into_array(batch.num_rows())?;
288304
let when_value = as_boolean_array(&when_value).map_err(|_| {
289305
internal_datafusion_err!("WHEN expression did not return a BooleanArray")
@@ -297,13 +313,18 @@ impl CaseExpr {
297313
let when_value = and(&when_value, &remainder)?;
298314

299315
// When no rows available for when clause, skip then clause
300-
if when_value.true_count() == 0 {
316+
let when_match_count = when_value.true_count();
317+
if when_match_count == 0 {
301318
continue;
302319
}
303320

304-
let then_value = self.when_then_expr[i]
305-
.1
306-
.evaluate_selection(batch, &when_value)?;
321+
let then_value = if when_match_count == batch.num_rows() {
322+
self.when_then_expr[i].1.evaluate(batch)?
323+
} else {
324+
self.when_then_expr[i]
325+
.1
326+
.evaluate_selection(batch, &when_value)?
327+
};
307328

308329
current_value = match then_value {
309330
ColumnarValue::Scalar(ScalarValue::Null) => {
@@ -320,10 +341,14 @@ impl CaseExpr {
320341
// Succeed tuples should be filtered out for short-circuit evaluation,
321342
// null values for the current when expr should be kept
322343
remainder = and_not(&remainder, &when_value)?;
344+
remainder_count -= when_match_count;
345+
if remainder_count == 0 {
346+
break;
347+
}
323348
}
324349

325350
if let Some(e) = self.else_expr() {
326-
if remainder.true_count() > 0 {
351+
if remainder_count > 0 {
327352
// keep `else_expr`'s data type and return type consistent
328353
let expr = try_cast(Arc::clone(e), &batch.schema(), return_type.clone())?;
329354
let else_ = expr

0 commit comments

Comments
 (0)