|
15 | 15 | // specific language governing permissions and limitations |
16 | 16 | // under the License. |
17 | 17 |
|
18 | | -//! Declaration of built-in (scalar) functions. |
| 18 | +//! Deprecated module. Add new feature in scalar_function.rs |
| 19 | +//! |
19 | 20 | //! This module contains built-in functions' enumeration and metadata. |
20 | 21 | //! |
21 | 22 | //! Generally, a function has: |
|
30 | 31 | //! an argument i32 is passed to a function that supports f64, the |
31 | 32 | //! argument is automatically is coerced to f64. |
32 | 33 |
|
33 | | -use std::ops::Neg; |
34 | 34 | use std::sync::Arc; |
35 | 35 |
|
36 | | -use arrow::{array::ArrayRef, datatypes::Schema}; |
| 36 | +use arrow::array::ArrayRef; |
37 | 37 | use arrow_array::Array; |
38 | 38 |
|
39 | | -use datafusion_common::{DFSchema, Result, ScalarValue}; |
| 39 | +pub use crate::scalar_function::create_physical_expr; |
| 40 | +use datafusion_common::{Result, ScalarValue}; |
40 | 41 | pub use datafusion_expr::FuncMonotonicity; |
41 | | -use datafusion_expr::{ |
42 | | - type_coercion::functions::data_types, ColumnarValue, ScalarFunctionImplementation, |
43 | | -}; |
44 | | -use datafusion_expr::{Expr, ScalarFunctionDefinition, ScalarUDF}; |
45 | | - |
46 | | -use crate::sort_properties::SortProperties; |
47 | | -use crate::{PhysicalExpr, ScalarFunctionExpr}; |
48 | | - |
49 | | -/// Create a physical (function) expression. |
50 | | -/// This function errors when `args`' can't be coerced to a valid argument type of the function. |
51 | | -pub fn create_physical_expr( |
52 | | - fun: &ScalarUDF, |
53 | | - input_phy_exprs: &[Arc<dyn PhysicalExpr>], |
54 | | - input_schema: &Schema, |
55 | | - args: &[Expr], |
56 | | - input_dfschema: &DFSchema, |
57 | | -) -> Result<Arc<dyn PhysicalExpr>> { |
58 | | - let input_expr_types = input_phy_exprs |
59 | | - .iter() |
60 | | - .map(|e| e.data_type(input_schema)) |
61 | | - .collect::<Result<Vec<_>>>()?; |
62 | | - |
63 | | - // verify that input data types is consistent with function's `TypeSignature` |
64 | | - data_types(&input_expr_types, fun.signature())?; |
65 | | - |
66 | | - // Since we have arg_types, we don't need args and schema. |
67 | | - let return_type = |
68 | | - fun.return_type_from_exprs(args, input_dfschema, &input_expr_types)?; |
69 | | - |
70 | | - let fun_def = ScalarFunctionDefinition::UDF(Arc::new(fun.clone())); |
71 | | - Ok(Arc::new(ScalarFunctionExpr::new( |
72 | | - fun.name(), |
73 | | - fun_def, |
74 | | - input_phy_exprs.to_vec(), |
75 | | - return_type, |
76 | | - fun.monotonicity()?, |
77 | | - ))) |
78 | | -} |
| 42 | +use datafusion_expr::{ColumnarValue, ScalarFunctionImplementation}; |
79 | 43 |
|
80 | 44 | #[derive(Debug, Clone, Copy)] |
81 | 45 | pub enum Hint { |
@@ -164,309 +128,3 @@ where |
164 | 128 | } |
165 | 129 | }) |
166 | 130 | } |
167 | | - |
168 | | -/// Determines a [`ScalarFunctionExpr`]'s monotonicity for the given arguments |
169 | | -/// and the function's behavior depending on its arguments. |
170 | | -pub fn out_ordering( |
171 | | - func: &FuncMonotonicity, |
172 | | - arg_orderings: &[SortProperties], |
173 | | -) -> SortProperties { |
174 | | - func.iter().zip(arg_orderings).fold( |
175 | | - SortProperties::Singleton, |
176 | | - |prev_sort, (item, arg)| { |
177 | | - let current_sort = func_order_in_one_dimension(item, arg); |
178 | | - |
179 | | - match (prev_sort, current_sort) { |
180 | | - (_, SortProperties::Unordered) => SortProperties::Unordered, |
181 | | - (SortProperties::Singleton, SortProperties::Ordered(_)) => current_sort, |
182 | | - (SortProperties::Ordered(prev), SortProperties::Ordered(current)) |
183 | | - if prev.descending != current.descending => |
184 | | - { |
185 | | - SortProperties::Unordered |
186 | | - } |
187 | | - _ => prev_sort, |
188 | | - } |
189 | | - }, |
190 | | - ) |
191 | | -} |
192 | | - |
193 | | -/// This function decides the monotonicity property of a [`ScalarFunctionExpr`] for a single argument (i.e. across a single dimension), given that argument's sort properties. |
194 | | -fn func_order_in_one_dimension( |
195 | | - func_monotonicity: &Option<bool>, |
196 | | - arg: &SortProperties, |
197 | | -) -> SortProperties { |
198 | | - if *arg == SortProperties::Singleton { |
199 | | - SortProperties::Singleton |
200 | | - } else { |
201 | | - match func_monotonicity { |
202 | | - None => SortProperties::Unordered, |
203 | | - Some(false) => { |
204 | | - if let SortProperties::Ordered(_) = arg { |
205 | | - arg.neg() |
206 | | - } else { |
207 | | - SortProperties::Unordered |
208 | | - } |
209 | | - } |
210 | | - Some(true) => { |
211 | | - if let SortProperties::Ordered(_) = arg { |
212 | | - *arg |
213 | | - } else { |
214 | | - SortProperties::Unordered |
215 | | - } |
216 | | - } |
217 | | - } |
218 | | - } |
219 | | -} |
220 | | - |
221 | | -#[cfg(test)] |
222 | | -mod tests { |
223 | | - use arrow::{ |
224 | | - array::UInt64Array, |
225 | | - datatypes::{DataType, Field}, |
226 | | - }; |
227 | | - use arrow_schema::DataType::Utf8; |
228 | | - |
229 | | - use datafusion_common::cast::as_uint64_array; |
230 | | - use datafusion_common::DataFusionError; |
231 | | - use datafusion_common::{internal_err, plan_err}; |
232 | | - use datafusion_expr::{Signature, Volatility}; |
233 | | - |
234 | | - use crate::expressions::try_cast; |
235 | | - use crate::utils::tests::TestScalarUDF; |
236 | | - |
237 | | - use super::*; |
238 | | - |
239 | | - #[test] |
240 | | - fn test_empty_arguments_error() -> Result<()> { |
241 | | - let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); |
242 | | - let udf = ScalarUDF::new_from_impl(TestScalarUDF { |
243 | | - signature: Signature::variadic(vec![Utf8], Volatility::Immutable), |
244 | | - }); |
245 | | - let expr = create_physical_expr_with_type_coercion( |
246 | | - &udf, |
247 | | - &[], |
248 | | - &schema, |
249 | | - &[], |
250 | | - &DFSchema::empty(), |
251 | | - ); |
252 | | - |
253 | | - match expr { |
254 | | - Ok(..) => { |
255 | | - return plan_err!( |
256 | | - "ScalarUDF function {udf:?} does not support empty arguments" |
257 | | - ); |
258 | | - } |
259 | | - Err(DataFusionError::Plan(_)) => { |
260 | | - // Continue the loop |
261 | | - } |
262 | | - Err(..) => { |
263 | | - return internal_err!( |
264 | | - "ScalarUDF function {udf:?} didn't got the right error with empty arguments"); |
265 | | - } |
266 | | - } |
267 | | - |
268 | | - Ok(()) |
269 | | - } |
270 | | - |
271 | | - // Helper function just for testing. |
272 | | - // Returns `expressions` coerced to types compatible with |
273 | | - // `signature`, if possible. |
274 | | - pub fn coerce( |
275 | | - expressions: &[Arc<dyn PhysicalExpr>], |
276 | | - schema: &Schema, |
277 | | - signature: &Signature, |
278 | | - ) -> Result<Vec<Arc<dyn PhysicalExpr>>> { |
279 | | - if expressions.is_empty() { |
280 | | - return Ok(vec![]); |
281 | | - } |
282 | | - |
283 | | - let current_types = expressions |
284 | | - .iter() |
285 | | - .map(|e| e.data_type(schema)) |
286 | | - .collect::<Result<Vec<_>>>()?; |
287 | | - |
288 | | - let new_types = data_types(¤t_types, signature)?; |
289 | | - |
290 | | - expressions |
291 | | - .iter() |
292 | | - .enumerate() |
293 | | - .map(|(i, expr)| try_cast(expr.clone(), schema, new_types[i].clone())) |
294 | | - .collect::<Result<Vec<_>>>() |
295 | | - } |
296 | | - |
297 | | - // Helper function just for testing. |
298 | | - // The type coercion will be done in the logical phase, should do the type coercion for the test |
299 | | - fn create_physical_expr_with_type_coercion( |
300 | | - fun: &ScalarUDF, |
301 | | - input_phy_exprs: &[Arc<dyn PhysicalExpr>], |
302 | | - input_schema: &Schema, |
303 | | - args: &[Expr], |
304 | | - input_dfschema: &DFSchema, |
305 | | - ) -> Result<Arc<dyn PhysicalExpr>> { |
306 | | - let type_coerced_phy_exprs = |
307 | | - coerce(input_phy_exprs, input_schema, fun.signature()).unwrap(); |
308 | | - create_physical_expr( |
309 | | - fun, |
310 | | - &type_coerced_phy_exprs, |
311 | | - input_schema, |
312 | | - args, |
313 | | - input_dfschema, |
314 | | - ) |
315 | | - } |
316 | | - |
317 | | - fn dummy_function(args: &[ArrayRef]) -> Result<ArrayRef> { |
318 | | - let result: UInt64Array = |
319 | | - args.iter().map(|array| Some(array.len() as u64)).collect(); |
320 | | - Ok(Arc::new(result) as ArrayRef) |
321 | | - } |
322 | | - |
323 | | - fn unpack_uint64_array(col: Result<ColumnarValue>) -> Result<Vec<u64>> { |
324 | | - if let ColumnarValue::Array(array) = col? { |
325 | | - Ok(as_uint64_array(&array)?.values().to_vec()) |
326 | | - } else { |
327 | | - internal_err!("Unexpected scalar created by a test function") |
328 | | - } |
329 | | - } |
330 | | - |
331 | | - #[test] |
332 | | - fn test_make_scalar_function() -> Result<()> { |
333 | | - let adapter_func = make_scalar_function_inner(dummy_function); |
334 | | - |
335 | | - let scalar_arg = ColumnarValue::Scalar(ScalarValue::Int64(Some(1))); |
336 | | - let array_arg = ColumnarValue::Array( |
337 | | - ScalarValue::Int64(Some(1)) |
338 | | - .to_array_of_size(5) |
339 | | - .expect("Failed to convert to array of size"), |
340 | | - ); |
341 | | - let result = unpack_uint64_array(adapter_func(&[array_arg, scalar_arg]))?; |
342 | | - assert_eq!(result, vec![5, 5]); |
343 | | - |
344 | | - Ok(()) |
345 | | - } |
346 | | - |
347 | | - #[test] |
348 | | - fn test_make_scalar_function_with_no_hints() -> Result<()> { |
349 | | - let adapter_func = make_scalar_function_with_hints(dummy_function, vec![]); |
350 | | - |
351 | | - let scalar_arg = ColumnarValue::Scalar(ScalarValue::Int64(Some(1))); |
352 | | - let array_arg = ColumnarValue::Array( |
353 | | - ScalarValue::Int64(Some(1)) |
354 | | - .to_array_of_size(5) |
355 | | - .expect("Failed to convert to array of size"), |
356 | | - ); |
357 | | - let result = unpack_uint64_array(adapter_func(&[array_arg, scalar_arg]))?; |
358 | | - assert_eq!(result, vec![5, 5]); |
359 | | - |
360 | | - Ok(()) |
361 | | - } |
362 | | - |
363 | | - #[test] |
364 | | - fn test_make_scalar_function_with_hints() -> Result<()> { |
365 | | - let adapter_func = make_scalar_function_with_hints( |
366 | | - dummy_function, |
367 | | - vec![Hint::Pad, Hint::AcceptsSingular], |
368 | | - ); |
369 | | - |
370 | | - let scalar_arg = ColumnarValue::Scalar(ScalarValue::Int64(Some(1))); |
371 | | - let array_arg = ColumnarValue::Array( |
372 | | - ScalarValue::Int64(Some(1)) |
373 | | - .to_array_of_size(5) |
374 | | - .expect("Failed to convert to array of size"), |
375 | | - ); |
376 | | - let result = unpack_uint64_array(adapter_func(&[array_arg, scalar_arg]))?; |
377 | | - assert_eq!(result, vec![5, 1]); |
378 | | - |
379 | | - Ok(()) |
380 | | - } |
381 | | - |
382 | | - #[test] |
383 | | - fn test_make_scalar_function_with_hints_on_arrays() -> Result<()> { |
384 | | - let array_arg = ColumnarValue::Array( |
385 | | - ScalarValue::Int64(Some(1)) |
386 | | - .to_array_of_size(5) |
387 | | - .expect("Failed to convert to array of size"), |
388 | | - ); |
389 | | - let adapter_func = make_scalar_function_with_hints( |
390 | | - dummy_function, |
391 | | - vec![Hint::Pad, Hint::AcceptsSingular], |
392 | | - ); |
393 | | - |
394 | | - let result = unpack_uint64_array(adapter_func(&[array_arg.clone(), array_arg]))?; |
395 | | - assert_eq!(result, vec![5, 5]); |
396 | | - |
397 | | - Ok(()) |
398 | | - } |
399 | | - |
400 | | - #[test] |
401 | | - fn test_make_scalar_function_with_mixed_hints() -> Result<()> { |
402 | | - let adapter_func = make_scalar_function_with_hints( |
403 | | - dummy_function, |
404 | | - vec![Hint::Pad, Hint::AcceptsSingular, Hint::Pad], |
405 | | - ); |
406 | | - |
407 | | - let scalar_arg = ColumnarValue::Scalar(ScalarValue::Int64(Some(1))); |
408 | | - let array_arg = ColumnarValue::Array( |
409 | | - ScalarValue::Int64(Some(1)) |
410 | | - .to_array_of_size(5) |
411 | | - .expect("Failed to convert to array of size"), |
412 | | - ); |
413 | | - let result = unpack_uint64_array(adapter_func(&[ |
414 | | - array_arg, |
415 | | - scalar_arg.clone(), |
416 | | - scalar_arg, |
417 | | - ]))?; |
418 | | - assert_eq!(result, vec![5, 1, 5]); |
419 | | - |
420 | | - Ok(()) |
421 | | - } |
422 | | - |
423 | | - #[test] |
424 | | - fn test_make_scalar_function_with_more_arguments_than_hints() -> Result<()> { |
425 | | - let adapter_func = make_scalar_function_with_hints( |
426 | | - dummy_function, |
427 | | - vec![Hint::Pad, Hint::AcceptsSingular, Hint::Pad], |
428 | | - ); |
429 | | - |
430 | | - let scalar_arg = ColumnarValue::Scalar(ScalarValue::Int64(Some(1))); |
431 | | - let array_arg = ColumnarValue::Array( |
432 | | - ScalarValue::Int64(Some(1)) |
433 | | - .to_array_of_size(5) |
434 | | - .expect("Failed to convert to array of size"), |
435 | | - ); |
436 | | - let result = unpack_uint64_array(adapter_func(&[ |
437 | | - array_arg.clone(), |
438 | | - scalar_arg.clone(), |
439 | | - scalar_arg, |
440 | | - array_arg, |
441 | | - ]))?; |
442 | | - assert_eq!(result, vec![5, 1, 5, 5]); |
443 | | - |
444 | | - Ok(()) |
445 | | - } |
446 | | - |
447 | | - #[test] |
448 | | - fn test_make_scalar_function_with_hints_than_arguments() -> Result<()> { |
449 | | - let adapter_func = make_scalar_function_with_hints( |
450 | | - dummy_function, |
451 | | - vec![ |
452 | | - Hint::Pad, |
453 | | - Hint::AcceptsSingular, |
454 | | - Hint::Pad, |
455 | | - Hint::Pad, |
456 | | - Hint::AcceptsSingular, |
457 | | - Hint::Pad, |
458 | | - ], |
459 | | - ); |
460 | | - |
461 | | - let scalar_arg = ColumnarValue::Scalar(ScalarValue::Int64(Some(1))); |
462 | | - let array_arg = ColumnarValue::Array( |
463 | | - ScalarValue::Int64(Some(1)) |
464 | | - .to_array_of_size(5) |
465 | | - .expect("Failed to convert to array of size"), |
466 | | - ); |
467 | | - let result = unpack_uint64_array(adapter_func(&[array_arg, scalar_arg]))?; |
468 | | - assert_eq!(result, vec![5, 1]); |
469 | | - |
470 | | - Ok(()) |
471 | | - } |
472 | | -} |
0 commit comments