Skip to content

Commit

Permalink
build: bump up datafusion to version 41, arrow to 52.2 (#120)
Browse files Browse the repository at this point in the history
- Update the project dependencies to DataFusion 41.0.0 and Arrow 52.2.0.
- Rust version bump to 1.76
  • Loading branch information
yjshen authored Aug 29, 2024
1 parent 5bf117a commit f56603b
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 23 deletions.
36 changes: 18 additions & 18 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ resolver = "2"
version = "0.2.0"
edition = "2021"
license = "Apache-2.0"
rust-version = "1.75.0"
rust-version = "1.76"
keywords = ["apachehudi", "hudi", "datalake", "arrow"]
readme = "README.md"
description = "A native Rust library for Apache Hudi"
Expand All @@ -35,25 +35,25 @@ repository = "https://github.com/apache/hudi-rs"

[workspace.dependencies]
# arrow
arrow = { version = "= 52.0.0", features = ["pyarrow"] }
arrow-arith = { version = "= 52.0.0" }
arrow-array = { version = "= 52.0.0" }
arrow-buffer = { version = "= 52.0.0" }
arrow-cast = { version = "= 52.0.0" }
arrow-ipc = { version = "= 52.0.0" }
arrow-json = { version = "= 52.0.0" }
arrow-ord = { version = "= 52.0.0" }
arrow-row = { version = "= 52.0.0" }
arrow-schema = { version = "= 52.0.0", features = ["serde"] }
arrow-select = { version = "= 52.0.0" }
object_store = { version = "= 0.10.1", features = ["aws", "azure", "gcp"] }
parquet = { version = "= 52.0.0", features = ["async", "object_store"] }
arrow = { version = "= 52.2.0", features = ["pyarrow"] }
arrow-arith = { version = "= 52.2.0" }
arrow-array = { version = "= 52.2.0" }
arrow-buffer = { version = "= 52.2.0" }
arrow-cast = { version = "= 52.2.0" }
arrow-ipc = { version = "= 52.2.0" }
arrow-json = { version = "= 52.2.0" }
arrow-ord = { version = "= 52.2.0" }
arrow-row = { version = "= 52.2.0" }
arrow-schema = { version = "= 52.2.0", features = ["serde"] }
arrow-select = { version = "= 52.2.0" }
object_store = { version = "= 0.10.2", features = ["aws", "azure", "gcp"] }
parquet = { version = "= 52.2.0", features = ["async", "object_store"] }

# datafusion
datafusion = { version = "= 39.0.0" }
datafusion-expr = { version = "= 39.0.0" }
datafusion-common = { version = "= 39.0.0" }
datafusion-physical-expr = { version = "= 39.0.0" }
datafusion = { version = "= 41.0.0" }
datafusion-expr = { version = "= 41.0.0" }
datafusion-common = { version = "= 41.0.0" }
datafusion-physical-expr = { version = "= 41.0.0" }

# serde
serde = { version = "1.0.203", features = ["derive"] }
Expand Down
2 changes: 1 addition & 1 deletion crates/core/src/table/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ mod tests {
.get_schema()
.await
.unwrap()
.all_fields()
.flattened_fields()
.into_iter()
.map(|f| f.name().to_string())
.collect();
Expand Down
11 changes: 8 additions & 3 deletions crates/datafusion/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,14 @@ use std::thread;

use arrow_schema::SchemaRef;
use async_trait::async_trait;
use datafusion::catalog::Session;
use datafusion::datasource::listing::PartitionedFile;
use datafusion::datasource::object_store::ObjectStoreUrl;
use datafusion::datasource::physical_plan::parquet::ParquetExecBuilder;
use datafusion::datasource::physical_plan::FileScanConfig;
use datafusion::datasource::TableProvider;
use datafusion::execution::context::SessionState;
use datafusion::physical_plan::ExecutionPlan;
use datafusion_common::config::TableParquetOptions;
use datafusion_common::DFSchema;
use datafusion_common::DataFusionError::Execution;
use datafusion_common::Result;
Expand Down Expand Up @@ -92,7 +93,7 @@ impl TableProvider for HudiDataSource {

async fn scan(
&self,
state: &SessionState,
state: &dyn Session,
projection: Option<&Vec<usize>>,
filters: &[Expr],
limit: Option<usize>,
Expand Down Expand Up @@ -123,7 +124,11 @@ impl TableProvider for HudiDataSource {
.with_projection(projection.cloned())
.with_limit(limit);

let parquet_opts = state.table_options().parquet.clone();
let parquet_opts = TableParquetOptions {
global: state.config_options().execution.parquet.clone(),
column_specific_options: Default::default(),
key_value_metadata: Default::default(),
};
let mut exec_builder = ParquetExecBuilder::new_with_options(fsc, parquet_opts);

let filter = filters.iter().cloned().reduce(|acc, new| acc.and(new));
Expand Down
2 changes: 1 addition & 1 deletion rust-toolchain.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@
# under the License.

[toolchain]
channel = "1.75"
channel = "1.76"
components = ["rustfmt", "clippy"]
profile = "minimal"

0 comments on commit f56603b

Please sign in to comment.