diff --git a/rust/lance-index/src/scalar/inverted/builder.rs b/rust/lance-index/src/scalar/inverted/builder.rs index 6b99e7c917..2065a093e0 100644 --- a/rust/lance-index/src/scalar/inverted/builder.rs +++ b/rust/lance-index/src/scalar/inverted/builder.rs @@ -285,8 +285,7 @@ impl InvertedIndexBuilder { Result::Ok((batch, max_score)) } }); - let mut stream = - stream::iter(batches).buffer_unordered(get_num_compute_intensive_cpus()); + let mut stream = stream::iter(batches).buffered(get_num_compute_intensive_cpus()); let mut offsets = Vec::new(); let mut max_scores = Vec::new(); let mut num_rows = 0; diff --git a/rust/lance/src/index.rs b/rust/lance/src/index.rs index 76a9268720..7ac0b115d0 100644 --- a/rust/lance/src/index.rs +++ b/rust/lance/src/index.rs @@ -945,6 +945,7 @@ impl DatasetIndexInternalExt for Dataset { #[cfg(test)] mod tests { use crate::dataset::builder::DatasetBuilder; + use crate::dataset::optimize::{compact_files, CompactionOptions}; use crate::utils::test::{DatagenExt, FragmentCount, FragmentRowCount}; use super::*; @@ -1556,6 +1557,32 @@ mod tests { assert_eq!(texts.len(), 1, "query: {}, texts: {:?}", word, texts); assert_eq!(texts[0], word, "query: {}, texts: {:?}", word, texts); + + // we should be able to query the new words after compaction + compact_files(&mut dataset, CompactionOptions::default(), None) + .await + .unwrap(); + for &word in uppercase_words.iter() { + let query_result = dataset + .scan() + .project(&["text"]) + .unwrap() + .full_text_search(FullTextSearchQuery::new(word.to_string())) + .unwrap() + .try_into_batch() + .await + .unwrap(); + let texts = query_result["text"] + .as_string::() + .iter() + .map(|v| match v { + None => "".to_string(), + Some(v) => v.to_string(), + }) + .collect::>(); + assert_eq!(texts.len(), 1, "query: {}, texts: {:?}", word, texts); + assert_eq!(texts[0], word, "query: {}, texts: {:?}", word, texts); + } } }