From 587277fddae2c04c24fd9bbd8defc096ab6e58dc Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 14 Feb 2025 09:35:09 -0500 Subject: [PATCH] Update features / status documentation page (#14645) * Update features / status documentation page * Fix doctest include --- datafusion/core/src/lib.rs | 12 ++--- docs/source/index.rst | 1 + .../{sql/sql_status.md => features.md} | 51 +++++++++++-------- docs/source/user-guide/sql/index.rst | 1 - 4 files changed, 38 insertions(+), 27 deletions(-) rename docs/source/user-guide/{sql/sql_status.md => features.md} (65%) diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs index 70b595442cea..3ea9e67be299 100644 --- a/datafusion/core/src/lib.rs +++ b/datafusion/core/src/lib.rs @@ -920,6 +920,12 @@ doc_comment::doctest!( user_guide_cli_usage ); +#[cfg(doctest)] +doc_comment::doctest!( + "../../../docs/source/user-guide/features.md", + user_guide_features +); + #[cfg(doctest)] doc_comment::doctest!( "../../../docs/source/user-guide/sql/aggregate_functions.md", @@ -986,12 +992,6 @@ doc_comment::doctest!( user_guide_sql_special_functions ); -#[cfg(doctest)] -doc_comment::doctest!( - "../../../docs/source/user-guide/sql/sql_status.md", - user_guide_sql_status -); - #[cfg(doctest)] doc_comment::doctest!( "../../../docs/source/user-guide/sql/subqueries.md", diff --git a/docs/source/index.rst b/docs/source/index.rst index 03561be3893c..45c4ffafe7f2 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -103,6 +103,7 @@ To get started, see user-guide/introduction user-guide/example-usage + user-guide/features user-guide/concepts-readings-events user-guide/crate-configuration user-guide/cli/index diff --git a/docs/source/user-guide/sql/sql_status.md b/docs/source/user-guide/features.md similarity index 65% rename from docs/source/user-guide/sql/sql_status.md rename to docs/source/user-guide/features.md index cb9bc0bb67b3..1f73ce7eac11 100644 --- a/docs/source/user-guide/sql/sql_status.md +++ b/docs/source/user-guide/features.md @@ -17,23 +17,28 @@ under the License. --> -# Status +# Features ## General - [x] SQL Parser - [x] SQL Query Planner +- [x] DataFrame API +- [x] Parallel query execution +- [x] Streaming Execution + +## Optimizations + - [x] Query Optimizer - [x] Constant folding - [x] Join Reordering - [x] Limit Pushdown - [x] Projection push down - [x] Predicate push down -- [x] Type coercion -- [x] Parallel query execution ## SQL Support +- [x] Type coercion - [x] Projection (`SELECT`) - [x] Filter (`WHERE`) - [x] Filter post-aggregate (`HAVING`) @@ -42,23 +47,23 @@ - [x] Aggregate (`GROUP BY`) - [x] cast /try_cast - [x] [`VALUES` lists](https://www.postgresql.org/docs/current/queries-values.html) -- [x] [String Functions](./scalar_functions.md#string-functions) -- [x] [Conditional Functions](./scalar_functions.md#conditional-functions) -- [x] [Time and Date Functions](./scalar_functions.md#time-and-date-functions) -- [x] [Math Functions](./scalar_functions.md#math-functions) -- [x] [Aggregate Functions](./aggregate_functions.md) (`SUM`, `MEDIAN`, and many more) +- [x] [String Functions](./sql/scalar_functions.md#string-functions) +- [x] [Conditional Functions](./sql/scalar_functions.md#conditional-functions) +- [x] [Time and Date Functions](./sql/scalar_functions.md#time-and-date-functions) +- [x] [Math Functions](./sql/scalar_functions.md#math-functions) +- [x] [Aggregate Functions](./sql/aggregate_functions.md) (`SUM`, `MEDIAN`, and many more) - [x] Schema Queries - [x] `SHOW TABLES` - [x] `SHOW COLUMNS FROM ` - [x] `SHOW CREATE TABLE ` - - [x] Basic SQL [Information Schema](./information_schema.md) (`TABLES`, `VIEWS`, `COLUMNS`) - - [ ] Full SQL [Information Schema](./information_schema.md) support -- [ ] Support for nested types (`ARRAY`/`LIST` and `STRUCT`. See [#2326](https://github.com/apache/datafusion/issues/2326) for details) + - [x] Basic SQL [Information Schema](./sql/information_schema.md) (`TABLES`, `VIEWS`, `COLUMNS`) + - [ ] Full SQL [Information Schema](./sql/information_schema.md) support +- [x] Support for nested types (`ARRAY`/`LIST` and `STRUCT`. - [x] Read support - [x] Write support - [x] Field access (`col['field']` and [`col[1]`]) - - [x] [Array Functions](./scalar_functions.md#array-functions) - - [ ] [Struct Functions](./scalar_functions.md#struct-functions) + - [x] [Array Functions](./sql/scalar_functions.md#array-functions) + - [x] [Struct Functions](./sql/scalar_functions.md#struct-functions) - [x] `struct` - [ ] [Postgres JSON operators](https://github.com/apache/datafusion/issues/6631) (`->`, `->>`, etc.) - [x] Subqueries @@ -73,12 +78,12 @@ - [x] Catalogs - [x] Schemas (`CREATE / DROP SCHEMA`) - [x] Tables (`CREATE / DROP TABLE`, `CREATE TABLE AS SELECT`) -- [ ] Data Insert +- [x] Data Insert - [x] `INSERT INTO` - - [ ] `COPY .. INTO ..` + - [x] `COPY .. INTO ..` - [x] CSV - - [ ] JSON - - [ ] Parquet + - [x] JSON + - [x] Parquet - [ ] Avro ## Runtime @@ -87,16 +92,22 @@ - [x] Streaming Window Evaluation - [x] Memory limits enforced - [x] Spilling (to disk) Sort -- [ ] Spilling (to disk) Grouping +- [x] Spilling (to disk) Grouping - [ ] Spilling (to disk) Joins ## Data Sources -In addition to allowing arbitrary datasources via the `TableProvider` +In addition to allowing arbitrary datasources via the [`TableProvider`] trait, DataFusion includes built in support for the following formats: - [x] CSV -- [x] Parquet (for all primitive and nested types) +- [x] Parquet + - [x] Primitive and Nested Types + - [x] Row Group and Data Page pruning on min/max statistics + - [x] Row Group pruning on Bloom Filters + - [x] Predicate push down (late materialization) [not by default](https://github.com/apache/datafusion/issues/3463) - [x] JSON - [x] Avro - [x] Arrow + +[`tableprovider`]: https://docs.rs/datafusion/latest/datafusion/catalog/trait.TableProvider.html diff --git a/docs/source/user-guide/sql/index.rst b/docs/source/user-guide/sql/index.rst index b7b8298f80d7..8e3f51bf8b0b 100644 --- a/docs/source/user-guide/sql/index.rst +++ b/docs/source/user-guide/sql/index.rst @@ -33,6 +33,5 @@ SQL Reference window_functions scalar_functions special_functions - sql_status write_options prepared_statements