From dbd519de64c3d41a9176fbed4c6767f1e746c294 Mon Sep 17 00:00:00 2001 From: Jonathan Date: Fri, 13 Jun 2025 00:14:41 -0400 Subject: [PATCH 1/7] Remove `null_equals_null` --- datafusion/physical-plan/src/joins/nested_loop_join.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index c9b5b9e43b6f2..9cbfa9aa0cf61 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -718,8 +718,6 @@ struct NestedLoopJoinStream { inner_table: OnceFut, /// Information of index and left / right placement of columns column_indices: Vec, - // TODO: support null aware equal - // null_equals_null: bool /// Join execution metrics join_metrics: BuildProbeJoinMetrics, /// Cache for join indices calculations From 686e0a73da39ca0ba8347b1c7b7ba11aac7728cd Mon Sep 17 00:00:00 2001 From: Jonathan Date: Fri, 13 Jun 2025 03:51:14 -0400 Subject: [PATCH 2/7] fix: Fixed error handling for `generate_series`/`range` --- .../functions-table/src/generate_series.rs | 16 +++++++++---- .../test_files/table_functions.slt | 23 +++++++++++-------- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/datafusion/functions-table/src/generate_series.rs b/datafusion/functions-table/src/generate_series.rs index ffb93cf59b16e..48d46ecf78820 100644 --- a/datafusion/functions-table/src/generate_series.rs +++ b/datafusion/functions-table/src/generate_series.rs @@ -197,11 +197,17 @@ impl TableFunctionImpl for GenerateSeriesFuncImpl { } let mut normalize_args = Vec::new(); - for expr in exprs { + for (expr_indice, expr) in exprs.iter().enumerate() { match expr { Expr::Literal(ScalarValue::Null, _) => {} Expr::Literal(ScalarValue::Int64(Some(n)), _) => normalize_args.push(*n), - _ => return plan_err!("First argument must be an integer literal"), + other => { + return plan_err!( + "Argument #{} must be an integer literal, got {:?}", + expr_indice + 1, + other + ) + } }; } @@ -232,15 +238,15 @@ impl TableFunctionImpl for GenerateSeriesFuncImpl { }; if start > end && step > 0 { - return plan_err!("start is bigger than end, but increment is positive: cannot generate infinite series"); + return plan_err!("Start is bigger than end, but increment is positive: Cannot generate infinite series"); } if start < end && step < 0 { - return plan_err!("start is smaller than end, but increment is negative: cannot generate infinite series"); + return plan_err!("Start is smaller than end, but increment is negative: Cannot generate infinite series"); } if step == 0 { - return plan_err!("step cannot be zero"); + return plan_err!("Step cannot be zero"); } Ok(Arc::new(GenerateSeriesTable { diff --git a/datafusion/sqllogictest/test_files/table_functions.slt b/datafusion/sqllogictest/test_files/table_functions.slt index 7d318c50bacf4..bce88138cdc64 100644 --- a/datafusion/sqllogictest/test_files/table_functions.slt +++ b/datafusion/sqllogictest/test_files/table_functions.slt @@ -160,16 +160,16 @@ physical_plan LazyMemoryExec: partitions=1, batch_generators=[generate_series: s # Test generate_series with invalid arguments # -query error DataFusion error: Error during planning: start is bigger than end, but increment is positive: cannot generate infinite series +query error DataFusion error: Error during planning: Start is bigger than end, but increment is positive: Cannot generate infinite series SELECT * FROM generate_series(5, 1) -query error DataFusion error: Error during planning: start is smaller than end, but increment is negative: cannot generate infinite series +query error DataFusion error: Error during planning: Start is smaller than end, but increment is negative: Cannot generate infinite series SELECT * FROM generate_series(-6, 6, -1) -query error DataFusion error: Error during planning: step cannot be zero +query error DataFusion error: Error during planning: Step cannot be zero SELECT * FROM generate_series(-6, 6, 0) -query error DataFusion error: Error during planning: start is bigger than end, but increment is positive: cannot generate infinite series +query error DataFusion error: Error during planning: Start is bigger than end, but increment is positive: Cannot generate infinite series SELECT * FROM generate_series(6, -6, 1) @@ -177,7 +177,7 @@ statement error DataFusion error: Error during planning: generate_series functio SELECT * FROM generate_series(1, 2, 3, 4) -statement error DataFusion error: Error during planning: First argument must be an integer literal +statement error DataFusion error: Error during planning: Argument #1 must be an integer literal, got Literal\(Utf8\("foo"\), None\) SELECT * FROM generate_series('foo', 'bar') # UDF and UDTF `generate_series` can be used simultaneously @@ -277,16 +277,16 @@ physical_plan LazyMemoryExec: partitions=1, batch_generators=[range: start=1, en # Test range with invalid arguments # -query error DataFusion error: Error during planning: start is bigger than end, but increment is positive: cannot generate infinite series +query error DataFusion error: Error during planning: Start is bigger than end, but increment is positive: Cannot generate infinite series SELECT * FROM range(5, 1) -query error DataFusion error: Error during planning: start is smaller than end, but increment is negative: cannot generate infinite series +query error DataFusion error: Error during planning: Start is smaller than end, but increment is negative: Cannot generate infinite series SELECT * FROM range(-6, 6, -1) -query error DataFusion error: Error during planning: step cannot be zero +query error DataFusion error: Error during planning: Step cannot be zero SELECT * FROM range(-6, 6, 0) -query error DataFusion error: Error during planning: start is bigger than end, but increment is positive: cannot generate infinite series +query error DataFusion error: Error during planning: Start is bigger than end, but increment is positive: Cannot generate infinite series SELECT * FROM range(6, -6, 1) @@ -294,9 +294,12 @@ statement error DataFusion error: Error during planning: range function requires SELECT * FROM range(1, 2, 3, 4) -statement error DataFusion error: Error during planning: First argument must be an integer literal +statement error DataFusion error: Error during planning: Argument #1 must be an integer literal, got Literal\(Utf8\("foo"\), None\) SELECT * FROM range('foo', 'bar') +statement error DataFusion error: Error during planning: Argument #2 must be an integer literal, got Literal\(Utf8\("bar"\), None\) +SELECT * FROM range(1, 'bar') + # UDF and UDTF `range` can be used simultaneously query ? rowsort SELECT range(1, t1.end) FROM range(3, 5) as t1(end) From c1621a52468403d7305f8c5d3f69bb9bfbc0866f Mon Sep 17 00:00:00 2001 From: Jonathan Date: Fri, 13 Jun 2025 03:56:38 -0400 Subject: [PATCH 3/7] add null --- datafusion/functions-table/src/generate_series.rs | 2 +- datafusion/sqllogictest/test_files/table_functions.slt | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/functions-table/src/generate_series.rs b/datafusion/functions-table/src/generate_series.rs index 48d46ecf78820..cae9a0353cffe 100644 --- a/datafusion/functions-table/src/generate_series.rs +++ b/datafusion/functions-table/src/generate_series.rs @@ -203,7 +203,7 @@ impl TableFunctionImpl for GenerateSeriesFuncImpl { Expr::Literal(ScalarValue::Int64(Some(n)), _) => normalize_args.push(*n), other => { return plan_err!( - "Argument #{} must be an integer literal, got {:?}", + "Argument #{} must be an integer literal or null value, got {:?}", expr_indice + 1, other ) diff --git a/datafusion/sqllogictest/test_files/table_functions.slt b/datafusion/sqllogictest/test_files/table_functions.slt index bce88138cdc64..97ecd4dada235 100644 --- a/datafusion/sqllogictest/test_files/table_functions.slt +++ b/datafusion/sqllogictest/test_files/table_functions.slt @@ -177,7 +177,7 @@ statement error DataFusion error: Error during planning: generate_series functio SELECT * FROM generate_series(1, 2, 3, 4) -statement error DataFusion error: Error during planning: Argument #1 must be an integer literal, got Literal\(Utf8\("foo"\), None\) +statement error DataFusion error: Error during planning: Argument #1 must be an integer literal or null value, got Literal\(Utf8\("foo"\), None\) SELECT * FROM generate_series('foo', 'bar') # UDF and UDTF `generate_series` can be used simultaneously @@ -294,10 +294,10 @@ statement error DataFusion error: Error during planning: range function requires SELECT * FROM range(1, 2, 3, 4) -statement error DataFusion error: Error during planning: Argument #1 must be an integer literal, got Literal\(Utf8\("foo"\), None\) +statement error DataFusion error: Error during planning: Argument #1 must be an integer literal or null value, got Literal\(Utf8\("foo"\), None\) SELECT * FROM range('foo', 'bar') -statement error DataFusion error: Error during planning: Argument #2 must be an integer literal, got Literal\(Utf8\("bar"\), None\) +statement error DataFusion error: Error during planning: Argument #2 must be an integer literal or null value, got Literal\(Utf8\("bar"\), None\) SELECT * FROM range(1, 'bar') # UDF and UDTF `range` can be used simultaneously From bd15a17f55eca7cecfd5ff878875b19af31616b5 Mon Sep 17 00:00:00 2001 From: Jonathan Date: Fri, 13 Jun 2025 17:27:45 -0400 Subject: [PATCH 4/7] fixes --- datafusion/functions-table/src/generate_series.rs | 7 ++++--- datafusion/sqllogictest/test_files/table_functions.slt | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/datafusion/functions-table/src/generate_series.rs b/datafusion/functions-table/src/generate_series.rs index cae9a0353cffe..00290fb456b21 100644 --- a/datafusion/functions-table/src/generate_series.rs +++ b/datafusion/functions-table/src/generate_series.rs @@ -197,14 +197,15 @@ impl TableFunctionImpl for GenerateSeriesFuncImpl { } let mut normalize_args = Vec::new(); - for (expr_indice, expr) in exprs.iter().enumerate() { + for (expr_index, expr) in exprs.iter().enumerate() { match expr { Expr::Literal(ScalarValue::Null, _) => {} Expr::Literal(ScalarValue::Int64(Some(n)), _) => normalize_args.push(*n), other => { return plan_err!( - "Argument #{} must be an integer literal or null value, got {:?}", - expr_indice + 1, + "Argument #{} must be an integer literal or null value, got {} ({:?})", + expr_index + 1, + other, other ) } diff --git a/datafusion/sqllogictest/test_files/table_functions.slt b/datafusion/sqllogictest/test_files/table_functions.slt index 97ecd4dada235..d000db238476d 100644 --- a/datafusion/sqllogictest/test_files/table_functions.slt +++ b/datafusion/sqllogictest/test_files/table_functions.slt @@ -177,7 +177,7 @@ statement error DataFusion error: Error during planning: generate_series functio SELECT * FROM generate_series(1, 2, 3, 4) -statement error DataFusion error: Error during planning: Argument #1 must be an integer literal or null value, got Literal\(Utf8\("foo"\), None\) +statement error DataFusion error: Error during planning: Argument #1 must be an integer literal or null value, got Utf8\("foo"\) \(Literal\(Utf8\("foo"\), None\)\) SELECT * FROM generate_series('foo', 'bar') # UDF and UDTF `generate_series` can be used simultaneously @@ -294,10 +294,10 @@ statement error DataFusion error: Error during planning: range function requires SELECT * FROM range(1, 2, 3, 4) -statement error DataFusion error: Error during planning: Argument #1 must be an integer literal or null value, got Literal\(Utf8\("foo"\), None\) +statement error DataFusion error: Error during planning: Argument #1 must be an integer literal or null value, got Utf8\("foo"\) \(Literal\(Utf8\("foo"\), None\)\) SELECT * FROM range('foo', 'bar') -statement error DataFusion error: Error during planning: Argument #2 must be an integer literal or null value, got Literal\(Utf8\("bar"\), None\) +statement error DataFusion error: Error during planning: Argument #2 must be an integer literal or null value, got Utf8\("bar"\) \(Literal\(Utf8\("bar"\), None\)\) SELECT * FROM range(1, 'bar') # UDF and UDTF `range` can be used simultaneously From 31c584e922a08a3e820cf3b4f0a02a9d6b5920da Mon Sep 17 00:00:00 2001 From: Jonathan Date: Sun, 15 Jun 2025 01:33:19 -0400 Subject: [PATCH 5/7] tweaks --- datafusion/functions-table/src/generate_series.rs | 2 +- datafusion/physical-plan/src/joins/nested_loop_join.rs | 2 ++ datafusion/sqllogictest/test_files/table_functions.slt | 6 +++--- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/datafusion/functions-table/src/generate_series.rs b/datafusion/functions-table/src/generate_series.rs index 00290fb456b21..e26b89964a029 100644 --- a/datafusion/functions-table/src/generate_series.rs +++ b/datafusion/functions-table/src/generate_series.rs @@ -203,7 +203,7 @@ impl TableFunctionImpl for GenerateSeriesFuncImpl { Expr::Literal(ScalarValue::Int64(Some(n)), _) => normalize_args.push(*n), other => { return plan_err!( - "Argument #{} must be an integer literal or null value, got {} ({:?})", + "Argument #{} must be an INTEGER or NULL, got {} ({:?})", expr_index + 1, other, other diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index 9cbfa9aa0cf61..c9b5b9e43b6f2 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -718,6 +718,8 @@ struct NestedLoopJoinStream { inner_table: OnceFut, /// Information of index and left / right placement of columns column_indices: Vec, + // TODO: support null aware equal + // null_equals_null: bool /// Join execution metrics join_metrics: BuildProbeJoinMetrics, /// Cache for join indices calculations diff --git a/datafusion/sqllogictest/test_files/table_functions.slt b/datafusion/sqllogictest/test_files/table_functions.slt index d000db238476d..e9dcade6e6dc0 100644 --- a/datafusion/sqllogictest/test_files/table_functions.slt +++ b/datafusion/sqllogictest/test_files/table_functions.slt @@ -177,7 +177,7 @@ statement error DataFusion error: Error during planning: generate_series functio SELECT * FROM generate_series(1, 2, 3, 4) -statement error DataFusion error: Error during planning: Argument #1 must be an integer literal or null value, got Utf8\("foo"\) \(Literal\(Utf8\("foo"\), None\)\) +statement error DataFusion error: Error during planning: Argument #1 must be an INTEGER or NULL, got Utf8\("foo"\) \(Literal\(Utf8\("foo"\), None\)\) SELECT * FROM generate_series('foo', 'bar') # UDF and UDTF `generate_series` can be used simultaneously @@ -294,10 +294,10 @@ statement error DataFusion error: Error during planning: range function requires SELECT * FROM range(1, 2, 3, 4) -statement error DataFusion error: Error during planning: Argument #1 must be an integer literal or null value, got Utf8\("foo"\) \(Literal\(Utf8\("foo"\), None\)\) +statement error DataFusion error: Error during planning: Argument #1 must be an INTEGER or NULL, got Utf8\("foo"\) \(Literal\(Utf8\("foo"\), None\)\) SELECT * FROM range('foo', 'bar') -statement error DataFusion error: Error during planning: Argument #2 must be an integer literal or null value, got Utf8\("bar"\) \(Literal\(Utf8\("bar"\), None\)\) +statement error DataFusion error: Error during planning: Argument #2 must be an INTEGER or NULL, got Utf8\("bar"\) \(Literal\(Utf8\("bar"\), None\)\) SELECT * FROM range(1, 'bar') # UDF and UDTF `range` can be used simultaneously From 2f29fdf426e110134bcda72123e7405aa33481c8 Mon Sep 17 00:00:00 2001 From: Jonathan Date: Sun, 15 Jun 2025 01:41:42 -0400 Subject: [PATCH 6/7] fix --- datafusion/functions-table/src/generate_series.rs | 3 +-- datafusion/sqllogictest/test_files/table_functions.slt | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/datafusion/functions-table/src/generate_series.rs b/datafusion/functions-table/src/generate_series.rs index e26b89964a029..c875874c569d0 100644 --- a/datafusion/functions-table/src/generate_series.rs +++ b/datafusion/functions-table/src/generate_series.rs @@ -203,9 +203,8 @@ impl TableFunctionImpl for GenerateSeriesFuncImpl { Expr::Literal(ScalarValue::Int64(Some(n)), _) => normalize_args.push(*n), other => { return plan_err!( - "Argument #{} must be an INTEGER or NULL, got {} ({:?})", + "Argument #{} must be an INTEGER or NULL, got {:?}", expr_index + 1, - other, other ) } diff --git a/datafusion/sqllogictest/test_files/table_functions.slt b/datafusion/sqllogictest/test_files/table_functions.slt index e9dcade6e6dc0..0d2c61ce2025d 100644 --- a/datafusion/sqllogictest/test_files/table_functions.slt +++ b/datafusion/sqllogictest/test_files/table_functions.slt @@ -177,7 +177,7 @@ statement error DataFusion error: Error during planning: generate_series functio SELECT * FROM generate_series(1, 2, 3, 4) -statement error DataFusion error: Error during planning: Argument #1 must be an INTEGER or NULL, got Utf8\("foo"\) \(Literal\(Utf8\("foo"\), None\)\) +statement error DataFusion error: Error during planning: Argument #1 must be an INTEGER or NULL, got Literal\(Utf8\("foo"\), None\) SELECT * FROM generate_series('foo', 'bar') # UDF and UDTF `generate_series` can be used simultaneously @@ -294,10 +294,10 @@ statement error DataFusion error: Error during planning: range function requires SELECT * FROM range(1, 2, 3, 4) -statement error DataFusion error: Error during planning: Argument #1 must be an INTEGER or NULL, got Utf8\("foo"\) \(Literal\(Utf8\("foo"\), None\)\) +statement error DataFusion error: Error during planning: Argument #1 must be an INTEGER or NULL, got Literal\(Utf8\("foo"\), None\) SELECT * FROM range('foo', 'bar') -statement error DataFusion error: Error during planning: Argument #2 must be an INTEGER or NULL, got Utf8\("bar"\) \(Literal\(Utf8\("bar"\), None\)\) +statement error DataFusion error: Error during planning: Argument #2 must be an INTEGER or NULL, got Literal\(Utf8\("foo"\), None\) SELECT * FROM range(1, 'bar') # UDF and UDTF `range` can be used simultaneously From 6df1001c9e073e2c3eb4d4a3f65d1387da49bb7d Mon Sep 17 00:00:00 2001 From: Jonathan Date: Sun, 15 Jun 2025 02:46:59 -0400 Subject: [PATCH 7/7] fix --- datafusion/sqllogictest/test_files/table_functions.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/table_functions.slt b/datafusion/sqllogictest/test_files/table_functions.slt index 0d2c61ce2025d..6852f4d7cc1e8 100644 --- a/datafusion/sqllogictest/test_files/table_functions.slt +++ b/datafusion/sqllogictest/test_files/table_functions.slt @@ -297,7 +297,7 @@ SELECT * FROM range(1, 2, 3, 4) statement error DataFusion error: Error during planning: Argument #1 must be an INTEGER or NULL, got Literal\(Utf8\("foo"\), None\) SELECT * FROM range('foo', 'bar') -statement error DataFusion error: Error during planning: Argument #2 must be an INTEGER or NULL, got Literal\(Utf8\("foo"\), None\) +statement error DataFusion error: Error during planning: Argument #2 must be an INTEGER or NULL, got Literal\(Utf8\("bar"\), None\) SELECT * FROM range(1, 'bar') # UDF and UDTF `range` can be used simultaneously