From cb571d9e2e7da39da2aec5ca50283b9d1d8ffe46 Mon Sep 17 00:00:00 2001
From: Blake Orth <borth@seerai.space>
Date: Mon, 18 Aug 2025 15:25:03 -0600
Subject: [PATCH 1/6] Fix: ListingTableFactory hive column detection  - Fixes
 an issue in the ListingTableFactory where hive columns are not    detected
 and incorporated into the table schema when an explicit    schema has not
 been set by the user  - Fixes an issue where subdirectories that do not
 follow Hive    formatting (e.g. key=value) could be erroneously interpreted
 as    contributing to the table schema

---
 .../core/src/datasource/listing/table.rs      |  3 +
 .../src/datasource/listing_table_factory.rs   | 73 ++++++++++++++++---
 .../test_files/insert_to_external.slt         | 28 +++++++
 3 files changed, 93 insertions(+), 11 deletions(-)

diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index d289a1d071296..c51735f73aa2b 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -802,6 +802,9 @@ impl ListingOptions {
                     .rev()
                     .skip(1) // get parents only; skip the file itself
                     .rev()
+                    // Partitions are expected to follow the format "column_name=value", so we
+                    // should ignore any path part that cannot be parsed into the expected format
+                    .filter(|s| s.contains('='))
                     .map(|s| s.split('=').take(1).collect())
                     .collect_vec()
             })
diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs
index 80dcdc1f34626..137cd08ee084a 100644
--- a/datafusion/core/src/datasource/listing_table_factory.rs
+++ b/datafusion/core/src/datasource/listing_table_factory.rs
@@ -64,15 +64,25 @@ impl TableProviderFactory for ListingTableFactory {
             .create(session_state, &cmd.options)?;
 
         let file_extension = get_extension(cmd.location.as_str());
+        let mut table_path = ListingTableUrl::parse(&cmd.location)?;
+        let mut options = ListingOptions::new(file_format)
+            .with_session_config_options(session_state.config())
+            .with_file_extension(file_extension);
 
         let (provided_schema, table_partition_cols) = if cmd.schema.fields().is_empty() {
+            let part_cols = match cmd.table_partition_cols.is_empty() {
+                true => options
+                    .infer_partitions(session_state, &table_path)
+                    .await?
+                    .into_iter(),
+                false => cmd.table_partition_cols.clone().into_iter(),
+            };
             (
                 None,
-                cmd.table_partition_cols
-                    .iter()
-                    .map(|x| {
+                part_cols
+                    .map(|p| {
                         (
-                            x.clone(),
+                            p,
                             DataType::Dictionary(
                                 Box::new(DataType::UInt16),
                                 Box::new(DataType::Utf8),
@@ -108,13 +118,7 @@ impl TableProviderFactory for ListingTableFactory {
             (Some(schema), table_partition_cols)
         };
 
-        let mut table_path = ListingTableUrl::parse(&cmd.location)?;
-
-        let options = ListingOptions::new(file_format)
-            .with_file_extension(&file_extension)
-            .with_session_config_options(session_state.config())
-            .with_table_partition_cols(table_partition_cols);
-
+        options = options.with_table_partition_cols(table_partition_cols);
         options
             .validate_partitions(session_state, &table_path)
             .await?;
@@ -189,6 +193,8 @@ fn get_extension(path: &str) -> String {
 mod tests {
     use glob::Pattern;
     use std::collections::HashMap;
+    use std::fs;
+    use std::path::PathBuf;
 
     use super::*;
     use crate::{
@@ -375,4 +381,49 @@ mod tests {
             Pattern::new("*.csv").unwrap()
         );
     }
+
+    #[tokio::test]
+    async fn test_create_with_hive_partitions() {
+        let dir = tempfile::tempdir().unwrap();
+        let mut path = PathBuf::from(dir.path());
+        path.extend(["key1=value1", "key2=value2"]);
+        fs::create_dir_all(&path).unwrap();
+        path.push("data.parquet");
+        fs::File::create_new(&path).unwrap();
+
+        let factory = ListingTableFactory::new();
+        let context = SessionContext::new();
+        let state = context.state();
+        let name = TableReference::bare("foo");
+
+        let cmd = CreateExternalTable {
+            name,
+            location: dir.path().to_str().unwrap().to_string(),
+            file_type: "parquet".to_string(),
+            schema: Arc::new(DFSchema::empty()),
+            table_partition_cols: vec![],
+            if_not_exists: false,
+            temporary: false,
+            definition: None,
+            order_exprs: vec![],
+            unbounded: false,
+            options: HashMap::new(),
+            constraints: Constraints::default(),
+            column_defaults: HashMap::new(),
+        };
+        let table_provider = factory.create(&state, &cmd).await.unwrap();
+        let listing_table = table_provider
+            .as_any()
+            .downcast_ref::<ListingTable>()
+            .unwrap();
+
+        let listing_options = listing_table.options();
+        let dtype =
+            DataType::Dictionary(Box::new(DataType::UInt16), Box::new(DataType::Utf8));
+        let expected_cols = vec![
+            (String::from("key1"), dtype.clone()),
+            (String::from("key2"), dtype.clone()),
+        ];
+        assert_eq!(expected_cols, listing_options.table_partition_cols);
+    }
 }
diff --git a/datafusion/sqllogictest/test_files/insert_to_external.slt b/datafusion/sqllogictest/test_files/insert_to_external.slt
index b6e35f4081398..1c084b6f84b10 100644
--- a/datafusion/sqllogictest/test_files/insert_to_external.slt
+++ b/datafusion/sqllogictest/test_files/insert_to_external.slt
@@ -175,6 +175,34 @@ select * from partitioned_insert_test order by a,b,c
 1 20 200
 2 20 200
 
+statement count 0
+CREATE EXTERNAL TABLE
+partitioned_insert_test_readback
+STORED AS csv
+LOCATION 'test_files/scratch/insert_to_external/insert_to_partitioned/';
+
+query TTT
+describe partitioned_insert_test_readback;
+----
+c Int64 YES
+a Dictionary(UInt16, Utf8) NO
+b Dictionary(UInt16, Utf8) NO
+
+query ITT
+select * from partitioned_insert_test_readback order by a,b,c;
+----
+1 10 100
+1 10 200
+1 20 100
+2 20 100
+1 20 200
+2 20 200
+
+query I
+select count(*) from partitioned_insert_test_readback where b=100;
+----
+3
+
 statement ok
 CREATE EXTERNAL TABLE
 partitioned_insert_test_verify(c bigint)

From b98cb6eedf0031f52b36cc9629ab30de64d8e9f2 Mon Sep 17 00:00:00 2001
From: Blake Orth <borth@seerai.space>
Date: Tue, 26 Aug 2025 17:03:36 -0600
Subject: [PATCH 2/6] Adds configuration, tests, and docs  - Adds a
 configuration option to enable or disable hive partition    schema inference 
 - Adds configuration option documentation and unit tests  - Adds additional
 sqllogic tests specifically targeting partitioned   listing tables  - Adds
 user guide docs for migration and external table behavior for    both the CLI
 and DDL guides

---
 datafusion/common/src/config.rs               |  5 ++
 .../src/datasource/listing_table_factory.rs   | 49 +++++++++++-
 .../test_files/listing_table_partitions.slt   | 75 +++++++++++++++++++
 docs/source/library-user-guide/upgrading.md   | 36 +++++++++
 docs/source/user-guide/cli/datasources.md     | 24 ++++++
 docs/source/user-guide/configs.md             |  1 +
 docs/source/user-guide/sql/ddl.md             | 29 +++++++
 7 files changed, 215 insertions(+), 4 deletions(-)
 create mode 100644 datafusion/sqllogictest/test_files/listing_table_partitions.slt

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index bec471bfe4545..07c6e19ce4f1b 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -455,6 +455,11 @@ config_namespace! {
         /// tables (e.g. `/table/year=2021/month=01/data.parquet`).
         pub listing_table_ignore_subdirectory: bool, default = true
 
+        /// Should a `ListingTable` created through the `ListingTableFactory` infer table
+        /// partitions from Hive compliant directories. Defaults to true (partition columns are
+        /// inferred and will be represented in the table schema).
+        pub listing_table_factory_infer_partitions: bool, default = true
+
         /// Should DataFusion support recursive CTEs
         pub enable_recursive_ctes: bool, default = true
 
diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs
index 137cd08ee084a..ee6daf6b8897d 100644
--- a/datafusion/core/src/datasource/listing_table_factory.rs
+++ b/datafusion/core/src/datasource/listing_table_factory.rs
@@ -70,13 +70,19 @@ impl TableProviderFactory for ListingTableFactory {
             .with_file_extension(file_extension);
 
         let (provided_schema, table_partition_cols) = if cmd.schema.fields().is_empty() {
-            let part_cols = match cmd.table_partition_cols.is_empty() {
-                true => options
+            let infer_parts = session_state
+                .config_options()
+                .execution
+                .listing_table_factory_infer_partitions;
+            let part_cols = if cmd.table_partition_cols.is_empty() && infer_parts {
+                options
                     .infer_partitions(session_state, &table_path)
                     .await?
-                    .into_iter(),
-                false => cmd.table_partition_cols.clone().into_iter(),
+                    .into_iter()
+            } else {
+                cmd.table_partition_cols.clone().into_iter()
             };
+
             (
                 None,
                 part_cols
@@ -191,6 +197,7 @@ fn get_extension(path: &str) -> String {
 
 #[cfg(test)]
 mod tests {
+    use datafusion_execution::config::SessionConfig;
     use glob::Pattern;
     use std::collections::HashMap;
     use std::fs;
@@ -425,5 +432,39 @@ mod tests {
             (String::from("key2"), dtype.clone()),
         ];
         assert_eq!(expected_cols, listing_options.table_partition_cols);
+
+        // Ensure partition detection can be disabled via config
+        let factory = ListingTableFactory::new();
+        let mut cfg = SessionConfig::new();
+        cfg.options_mut()
+            .execution
+            .listing_table_factory_infer_partitions = false;
+        let context = SessionContext::new_with_config(cfg);
+        let state = context.state();
+        let name = TableReference::bare("foo");
+
+        let cmd = CreateExternalTable {
+            name,
+            location: dir.path().to_str().unwrap().to_string(),
+            file_type: "parquet".to_string(),
+            schema: Arc::new(DFSchema::empty()),
+            table_partition_cols: vec![],
+            if_not_exists: false,
+            temporary: false,
+            definition: None,
+            order_exprs: vec![],
+            unbounded: false,
+            options: HashMap::new(),
+            constraints: Constraints::default(),
+            column_defaults: HashMap::new(),
+        };
+        let table_provider = factory.create(&state, &cmd).await.unwrap();
+        let listing_table = table_provider
+            .as_any()
+            .downcast_ref::<ListingTable>()
+            .unwrap();
+
+        let listing_options = listing_table.options();
+        assert!(listing_options.table_partition_cols.is_empty());
     }
 }
diff --git a/datafusion/sqllogictest/test_files/listing_table_partitions.slt b/datafusion/sqllogictest/test_files/listing_table_partitions.slt
new file mode 100644
index 0000000000000..52433429cfe80
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/listing_table_partitions.slt
@@ -0,0 +1,75 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+query I
+copy (values('foo'), ('bar'))
+to 'test_files/scratch/listing_table_partitions/single_part/a=1/file1.parquet';
+----
+2
+
+query I
+copy (values('baz'))
+to 'test_files/scratch/listing_table_partitions/single_part/a=1/file2.parquet';
+----
+1
+
+statement count 0
+create external table single_part
+stored as parquet location 'test_files/scratch/listing_table_partitions/single_part/';
+
+query TT
+select * from single_part order by (column1);
+----
+bar 1
+baz 1
+foo 1
+
+query I
+copy (values('foo'), ('bar')) to 'test_files/scratch/listing_table_partitions/multi_part/a=1/b=100/file1.parquet';
+----
+2
+
+query I
+copy (values('baz')) to 'test_files/scratch/listing_table_partitions/multi_part/a=1/b=200/file1.parquet';
+----
+1
+
+statement count 0
+create external table multi_part
+stored as parquet location 'test_files/scratch/listing_table_partitions/multi_part/';
+
+query TTT
+select * from multi_part where b=200; 
+----
+baz 1 200
+
+statement count 0
+set datafusion.execution.listing_table_factory_infer_partitions = false;
+
+statement count 0
+create external table infer_disabled
+stored as parquet location 'test_files/scratch/listing_table_partitions/multi_part/';
+
+query T
+select * from infer_disabled order by (column1);
+----
+bar
+baz
+foo
+
+statement count 0
+set datafusion.execution.listing_table_factory_infer_partitions = true;
diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md
index 05c0de0118680..38077ef4327a8 100644
--- a/docs/source/library-user-guide/upgrading.md
+++ b/docs/source/library-user-guide/upgrading.md
@@ -24,6 +24,42 @@
 **Note:** DataFusion `50.0.0` has not been released yet. The information provided in this section pertains to features and changes that have already been merged to the main branch and are awaiting release in this version.
 You can see the current [status of the `50.0.0 `release here](https://github.com/apache/datafusion/issues/16799)
 
+### New `datafusion.execution.listing_table_factory_infer_partitions` configuration option
+
+DataFusion 50.0.0 adds support for automatic Hive partition inference when using the `ListingTableFactory`. Previously,
+when creating a `ListingTable` through the `ListingTableFactory`, datasets that use Hive partitioning (e.g.
+`/table_root/column1=value1/column2=value2/data.parquet`) would not have the Hive columns or their values reflected in
+the table's schema or data. DataFusion will now, by default, detect Hive compliant partitions and incorporate them into
+the table's schema and data, allowing users to query and filter on the Hive columns. The previous behavior can be
+restored by setting the configuration option to `false`. Note that this does not affect low-level `ListingTable` users.
+
+**Configuration:**
+
+- **Key**: `datafusion.execution.listing_table_factory_infer_partitions`
+- **Default**: `true`
+- **Valid values**: `true`, `false`
+
+**Usage:**
+
+```rust
+# /* comment to avoid running
+use datafusion::prelude::*;
+use datafusion_common::config::SpillCompression;
+
+let mut config = SessionConfig::new();
+config.options_mut().execution.listing_table_factory_infer_partitions = false;
+let ctx = SessionContext::new_with_config(config);
+# */
+```
+
+Or via SQL:
+
+```sql
+SET datafusion.execution.listing_table_factory_infer_partitions = 'false';
+```
+
+[issue #17049]: https://github.com/apache/datafusion/issues/17049
+
 ### `ScalarUDFImpl`, `AggregateUDFImpl` and `WindowUDFImpl` traits now require `PartialEq`, `Eq`, and `Hash` traits
 
 To address error-proneness of `ScalarUDFImpl::equals`, `AggregateUDFImpl::equals`and
diff --git a/docs/source/user-guide/cli/datasources.md b/docs/source/user-guide/cli/datasources.md
index c15b8a5e46c99..6b1a4887a8a0f 100644
--- a/docs/source/user-guide/cli/datasources.md
+++ b/docs/source/user-guide/cli/datasources.md
@@ -162,6 +162,30 @@ STORED AS PARQUET
 LOCATION 'gs://bucket/my_table/';
 ```
 
+When specifying a directory path that has a Hive compliant partition structure, by default, DataFusion CLI will
+automatically parse and incorporate the Hive columns and their values into the table's schema and data. Given the
+following remote object paths:
+
+```console
+gs://bucket/my_table/a=1/b=100/file1.parquet
+gs://bucket/my_table/a=2/b=200/file2.parquet
+```
+
+`my_table` can be queried and filtered on the Hive columns:
+
+```sql
+CREATE EXTERNAL TABLE my_table
+STORED AS PARQUET
+LOCATION 'gs://bucket/my_table/';
+
+SELECT count(*) FROM my_table WHERE b=200;
++----------+
+| count(*) |
++----------+
+| 1        |
++----------+
+```
+
 # Formats
 
 ## Parquet
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index f1dad3d360a09..d3b0a4d673946 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -118,6 +118,7 @@ The following configuration settings are available:
 | datafusion.execution.soft_max_rows_per_output_file                      | 50000000                  | Target number of rows in output files when writing multiple. This is a soft max, so it can be exceeded slightly. There also will be one file smaller than the limit if the total number of rows written is not roughly divisible by the soft max                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
 | datafusion.execution.max_buffered_batches_per_output_file               | 2                         | This is the maximum number of RecordBatches buffered for each output file being worked. Higher values can potentially give faster write performance at the cost of higher peak memory consumption                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
 | datafusion.execution.listing_table_ignore_subdirectory                  | true                      | Should sub directories be ignored when scanning directories for data files. Defaults to true (ignores subdirectories), consistent with Hive. Note that this setting does not affect reading partitioned tables (e.g. `/table/year=2021/month=01/data.parquet`).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.execution.listing_table_factory_infer_partitions             | true                      | Should a `ListingTable` created through the `ListingTableFactory` infer table partitions from Hive compliant directories. Defaults to true (partition columns are inferred and will be represented in the table schema).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
 | datafusion.execution.enable_recursive_ctes                              | true                      | Should DataFusion support recursive CTEs                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
 | datafusion.execution.split_file_groups_by_statistics                    | false                     | Attempt to eliminate sorts by packing & sorting files with non-overlapping statistics into the same file groups. Currently experimental                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | datafusion.execution.keep_partition_by_columns                          | false                     | Should DataFusion keep the columns used for partition_by in the output RecordBatches                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
diff --git a/docs/source/user-guide/sql/ddl.md b/docs/source/user-guide/sql/ddl.md
index 89294b7518a41..3101d16aa2399 100644
--- a/docs/source/user-guide/sql/ddl.md
+++ b/docs/source/user-guide/sql/ddl.md
@@ -169,6 +169,35 @@ LOCATION '/path/to/directory/of/files'
 OPTIONS ('has_header' 'true');
 ```
 
+Tables that are partitioned using a Hive compliant partitioning scheme will have their columns and values automatically
+detected and incorporated into the table's schema and data. Given the following example directory structure:
+
+```console
+hive_partitioned/
+├── a=1
+│   └── b=200
+│       └── file1.parquet
+└── a=2
+    └── b=100
+        └── file2.parquet
+```
+
+Users can specify the top level `my_table` directory as an `EXTERNAL TABLE` and leverage the Hive partitions to query
+and filter data.
+
+```sql
+CREATE EXTERNAL TABLE hive_partitioned
+STORED AS PARQUET
+LOCATION '/path/to/hive_partitioned/';
+
+SELECT count(*) FROM hive_partitioned WHERE b=100;
++------------------+
+| count(*)         |
++------------------+
+| 1                |
++------------------+
+```
+
 ### Example: Unbounded Data Sources
 
 We can create unbounded data sources using the `CREATE UNBOUNDED EXTERNAL TABLE` SQL statement.

From 15cb099dfca279e52cd53e63993417d2d94c02d8 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 6 Sep 2025 06:50:48 -0400
Subject: [PATCH 3/6] Fix merge problem

---
 .../src/datasource/listing_table_factory.rs   | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs
index 54d1be42b6fb0..218a1fedbb379 100644
--- a/datafusion/core/src/datasource/listing_table_factory.rs
+++ b/datafusion/core/src/datasource/listing_table_factory.rs
@@ -397,6 +397,43 @@ mod tests {
         );
     }
 
+    #[tokio::test]
+    async fn test_odd_directory_names() {
+        let dir = tempfile::tempdir().unwrap();
+        let mut path = PathBuf::from(dir.path());
+        path.extend(["odd.v1", "odd.v2"]);
+        fs::create_dir_all(&path).unwrap();
+
+        let factory = ListingTableFactory::new();
+        let context = SessionContext::new();
+        let state = context.state();
+        let name = TableReference::bare("foo");
+
+        let cmd = CreateExternalTable {
+            name,
+            location: String::from(path.to_str().unwrap()),
+            file_type: "parquet".to_string(),
+            schema: Arc::new(DFSchema::empty()),
+            table_partition_cols: vec![],
+            if_not_exists: false,
+            temporary: false,
+            definition: None,
+            order_exprs: vec![],
+            unbounded: false,
+            options: HashMap::new(),
+            constraints: Constraints::default(),
+            column_defaults: HashMap::new(),
+        };
+        let table_provider = factory.create(&state, &cmd).await.unwrap();
+        let listing_table = table_provider
+            .as_any()
+            .downcast_ref::<ListingTable>()
+            .unwrap();
+
+        let listing_options = listing_table.options();
+        assert_eq!("", listing_options.file_extension);
+    }
+
     #[tokio::test]
     async fn test_create_with_hive_partitions() {
         let dir = tempfile::tempdir().unwrap();

From b1c575c9eb946b2ac6c19ddf3e6794b11cd20268 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 6 Sep 2025 07:13:00 -0400
Subject: [PATCH 4/6] Update slt test

---
 datafusion/sqllogictest/test_files/aggregate.slt          | 1 -
 datafusion/sqllogictest/test_files/information_schema.slt | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 35b2a6c03b399..1be2549ace71b 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -7444,4 +7444,3 @@ NULL NULL
 
 statement ok
 drop table distinct_avg;
-
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index fb2c89020112d..361bc97a17d9c 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -220,6 +220,7 @@ datafusion.execution.collect_statistics true
 datafusion.execution.enable_recursive_ctes true
 datafusion.execution.enforce_batch_size_in_joins false
 datafusion.execution.keep_partition_by_columns false
+datafusion.execution.listing_table_factory_infer_partitions true
 datafusion.execution.listing_table_ignore_subdirectory true
 datafusion.execution.max_buffered_batches_per_output_file 2
 datafusion.execution.meta_fetch_concurrency 32
@@ -334,6 +335,7 @@ datafusion.execution.collect_statistics true Should DataFusion collect statistic
 datafusion.execution.enable_recursive_ctes true Should DataFusion support recursive CTEs
 datafusion.execution.enforce_batch_size_in_joins false Should DataFusion enforce batch size in joins or not. By default, DataFusion will not enforce batch size in joins. Enforcing batch size in joins can reduce memory usage when joining large tables with a highly-selective join filter, but is also slightly slower.
 datafusion.execution.keep_partition_by_columns false Should DataFusion keep the columns used for partition_by in the output RecordBatches
+datafusion.execution.listing_table_factory_infer_partitions true Should a `ListingTable` created through the `ListingTableFactory` infer table partitions from Hive compliant directories. Defaults to true (partition columns are inferred and will be represented in the table schema).
 datafusion.execution.listing_table_ignore_subdirectory true Should sub directories be ignored when scanning directories for data files. Defaults to true (ignores subdirectories), consistent with Hive. Note that this setting does not affect reading partitioned tables (e.g. `/table/year=2021/month=01/data.parquet`).
 datafusion.execution.max_buffered_batches_per_output_file 2 This is the maximum number of RecordBatches buffered for each output file being worked. Higher values can potentially give faster write performance at the cost of higher peak memory consumption
 datafusion.execution.meta_fetch_concurrency 32 Number of files to read in parallel when inferring schema and statistics

From 15b2468d5f371dbe4b7b4b7361b598d66b58576f Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 6 Sep 2025 07:17:34 -0400
Subject: [PATCH 5/6] Make upgrade guide more concise

---
 docs/source/library-user-guide/upgrading.md | 41 ++++-----------------
 1 file changed, 8 insertions(+), 33 deletions(-)

diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md
index 0200eb5970921..bb33af5f30c02 100644
--- a/docs/source/library-user-guide/upgrading.md
+++ b/docs/source/library-user-guide/upgrading.md
@@ -24,39 +24,14 @@
 **Note:** DataFusion `50.0.0` has not been released yet. The information provided in this section pertains to features and changes that have already been merged to the main branch and are awaiting release in this version.
 You can see the current [status of the `50.0.0 `release here](https://github.com/apache/datafusion/issues/16799)
 
-### New `datafusion.execution.listing_table_factory_infer_partitions` configuration option
-
-DataFusion 50.0.0 adds support for automatic Hive partition inference when using the `ListingTableFactory`. Previously,
-when creating a `ListingTable` through the `ListingTableFactory`, datasets that use Hive partitioning (e.g.
-`/table_root/column1=value1/column2=value2/data.parquet`) would not have the Hive columns or their values reflected in
-the table's schema or data. DataFusion will now, by default, detect Hive compliant partitions and incorporate them into
-the table's schema and data, allowing users to query and filter on the Hive columns. The previous behavior can be
-restored by setting the configuration option to `false`. Note that this does not affect low-level `ListingTable` users.
-
-**Configuration:**
-
-- **Key**: `datafusion.execution.listing_table_factory_infer_partitions`
-- **Default**: `true`
-- **Valid values**: `true`, `false`
-
-**Usage:**
-
-```rust
-# /* comment to avoid running
-use datafusion::prelude::*;
-use datafusion_common::config::SpillCompression;
-
-let mut config = SessionConfig::new();
-config.options_mut().execution.listing_table_factory_infer_partitions = false;
-let ctx = SessionContext::new_with_config(config);
-# */
-```
-
-Or via SQL:
-
-```sql
-SET datafusion.execution.listing_table_factory_infer_partitions = 'false';
-```
+### ListingTable automatically detects Hive Partitoned tables
+
+DataFusion 50.0.0 automatically infers Hive partitions when using the `ListingTableFactory` and `CREATE EXTERNAL TABLE`. Previously,
+when creating a `ListingTable`, datasets that use Hive partitioning (e.g.
+`/table_root/column1=value1/column2=value2/data.parquet`) would not have the Hive columns reflected in
+the table's schema or data. The previous behavior can be
+restored by setting the `datafusion.execution.listing_table_factory_infer_partitions` configuration option to `false`.
+See [issue #17049] for more details.
 
 [issue #17049]: https://github.com/apache/datafusion/issues/17049
 

From 07a6e24d568c204c49e38547ac359a987c78ada9 Mon Sep 17 00:00:00 2001
From: Blake Orth <borth@seerai.space>
Date: Mon, 8 Sep 2025 16:56:18 -0600
Subject: [PATCH 6/6] Fixes spelling and doc table reference issues

---
 docs/source/library-user-guide/upgrading.md | 2 +-
 docs/source/user-guide/sql/ddl.md           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md
index bb33af5f30c02..f260119c7a580 100644
--- a/docs/source/library-user-guide/upgrading.md
+++ b/docs/source/library-user-guide/upgrading.md
@@ -24,7 +24,7 @@
 **Note:** DataFusion `50.0.0` has not been released yet. The information provided in this section pertains to features and changes that have already been merged to the main branch and are awaiting release in this version.
 You can see the current [status of the `50.0.0 `release here](https://github.com/apache/datafusion/issues/16799)
 
-### ListingTable automatically detects Hive Partitoned tables
+### ListingTable automatically detects Hive Partitioned tables
 
 DataFusion 50.0.0 automatically infers Hive partitions when using the `ListingTableFactory` and `CREATE EXTERNAL TABLE`. Previously,
 when creating a `ListingTable`, datasets that use Hive partitioning (e.g.
diff --git a/docs/source/user-guide/sql/ddl.md b/docs/source/user-guide/sql/ddl.md
index 8a816078520b2..bd41f691bf90b 100644
--- a/docs/source/user-guide/sql/ddl.md
+++ b/docs/source/user-guide/sql/ddl.md
@@ -182,7 +182,7 @@ hive_partitioned/
         └── file2.parquet
 ```
 
-Users can specify the top level `my_table` directory as an `EXTERNAL TABLE` and leverage the Hive partitions to query
+Users can specify the top level `hive_partitioned` directory as an `EXTERNAL TABLE` and leverage the Hive partitions to query
 and filter data.
 
 ```sql