Skip to content

Commit

Permalink
change to inverted_sparse
Browse files Browse the repository at this point in the history
Signed-off-by: Keming <[email protected]>
  • Loading branch information
kemingy committed Jul 29, 2024
1 parent 60a707f commit 3088f6c
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 45 deletions.
16 changes: 13 additions & 3 deletions crates/base/src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ impl IndexOptions {
..
}),
) => Ok(()),
(VectorKind::SVecf32, DistanceKind::Dot, IndexingOptions::Inverted()) => Ok(()),
(VectorKind::SVecf32, DistanceKind::Dot, IndexingOptions::InvertedSparse(_)) => Ok(()),
_ => Err(ValidationError::new("not valid index options")),
}
}
Expand Down Expand Up @@ -261,7 +261,7 @@ pub enum IndexingOptions {
Flat(FlatIndexingOptions),
Ivf(IvfIndexingOptions),
Hnsw(HnswIndexingOptions),
Inverted(),
InvertedSparse(InvertedSparseIndexingOptions),
}

impl IndexingOptions {
Expand Down Expand Up @@ -297,11 +297,21 @@ impl Validate for IndexingOptions {
Self::Flat(x) => x.validate(),
Self::Ivf(x) => x.validate(),
Self::Hnsw(x) => x.validate(),
Self::Inverted() => Ok(()),
Self::InvertedSparse(_) => Ok(()),
}
}
}

#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
#[serde(deny_unknown_fields)]
pub struct InvertedSparseIndexingOptions {}

impl Default for InvertedSparseIndexingOptions {
fn default() -> Self {
Self {}
}
}

#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
#[serde(deny_unknown_fields)]
pub struct FlatIndexingOptions {
Expand Down
16 changes: 8 additions & 8 deletions crates/index/src/indexing/sealed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ use base::operator::*;
use base::search::*;
use flat::Flat;
use hnsw::Hnsw;
use inverted::Inverted;
use inverted::InvertedSparse;
use ivf::Ivf;
use std::path::Path;

pub enum SealedIndexing<O: Op> {
Flat(Flat<O>),
Ivf(Ivf<O>),
Hnsw(Hnsw<O>),
Inverted(Inverted<O>),
InvertedSparse(InvertedSparse<O>),
}

impl<O: Op> SealedIndexing<O> {
Expand All @@ -25,7 +25,7 @@ impl<O: Op> SealedIndexing<O> {
IndexingOptions::Flat(_) => Self::Flat(Flat::create(path, options, source)),
IndexingOptions::Ivf(_) => Self::Ivf(Ivf::create(path, options, source)),
IndexingOptions::Hnsw(_) => Self::Hnsw(Hnsw::create(path, options, source)),
IndexingOptions::Inverted() => Self::Inverted(Inverted::create(path, options, source)),
IndexingOptions::InvertedSparse(_) => Self::InvertedSparse(InvertedSparse::create(path, options, source)),
}
}

Expand All @@ -34,7 +34,7 @@ impl<O: Op> SealedIndexing<O> {
IndexingOptions::Flat(_) => Self::Flat(Flat::open(path)),
IndexingOptions::Ivf(_) => Self::Ivf(Ivf::open(path)),
IndexingOptions::Hnsw(_) => Self::Hnsw(Hnsw::open(path)),
IndexingOptions::Inverted() => Self::Inverted(Inverted::open(path)),
IndexingOptions::InvertedSparse(_) => Self::InvertedSparse(InvertedSparse::open(path)),
}
}

Expand All @@ -47,7 +47,7 @@ impl<O: Op> SealedIndexing<O> {
SealedIndexing::Flat(x) => x.vbase(vector, opts),
SealedIndexing::Ivf(x) => x.vbase(vector, opts),
SealedIndexing::Hnsw(x) => x.vbase(vector, opts),
SealedIndexing::Inverted(x) => x.vbase(vector, opts),
SealedIndexing::InvertedSparse(x) => x.vbase(vector, opts),
}
}

Expand All @@ -56,7 +56,7 @@ impl<O: Op> SealedIndexing<O> {
SealedIndexing::Flat(x) => x.len(),
SealedIndexing::Ivf(x) => x.len(),
SealedIndexing::Hnsw(x) => x.len(),
SealedIndexing::Inverted(x) => x.len(),
SealedIndexing::InvertedSparse(x) => x.len(),
}
}

Expand All @@ -65,7 +65,7 @@ impl<O: Op> SealedIndexing<O> {
SealedIndexing::Flat(x) => x.vector(i),
SealedIndexing::Ivf(x) => x.vector(i),
SealedIndexing::Hnsw(x) => x.vector(i),
SealedIndexing::Inverted(x) => x.vector(i),
SealedIndexing::InvertedSparse(x) => x.vector(i),
}
}

Expand All @@ -74,7 +74,7 @@ impl<O: Op> SealedIndexing<O> {
SealedIndexing::Flat(x) => x.payload(i),
SealedIndexing::Ivf(x) => x.payload(i),
SealedIndexing::Hnsw(x) => x.payload(i),
SealedIndexing::Inverted(x) => x.payload(i),
SealedIndexing::InvertedSparse(x) => x.payload(i),
}
}
}
6 changes: 3 additions & 3 deletions crates/index/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use common::dir_ops::sync_walk_from_dir;
use common::file_atomic::FileAtomic;
use crossbeam::atomic::AtomicCell;
use crossbeam::channel::Sender;
use inverted::operator::OperatorInverted;
use inverted::operator::OperatorInvertedSparse;
use ivf::operator::OperatorIvf;
use parking_lot::Mutex;
use quantization::operator::OperatorQuantization;
Expand All @@ -43,11 +43,11 @@ use thiserror::Error;
use validator::Validate;

pub trait Op:
Operator + OperatorQuantization + OperatorStorage + OperatorIvf + OperatorInverted
Operator + OperatorQuantization + OperatorStorage + OperatorIvf + OperatorInvertedSparse
{
}

impl<T: Operator + OperatorQuantization + OperatorStorage + OperatorIvf + OperatorInverted> Op
impl<T: Operator + OperatorQuantization + OperatorStorage + OperatorIvf + OperatorInvertedSparse> Op
for T
{
}
Expand Down
2 changes: 1 addition & 1 deletion crates/index/src/segment/sealed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ impl<O: Op> SealedSegment<O> {
SealedIndexing::Flat(x) => x,
SealedIndexing::Ivf(x) => x,
SealedIndexing::Hnsw(x) => x,
SealedIndexing::Inverted(x) => x,
SealedIndexing::InvertedSparse(x) => x,
}
}
}
Expand Down
18 changes: 9 additions & 9 deletions crates/inverted/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

pub mod operator;

use self::operator::OperatorInverted;
use self::operator::OperatorInvertedSparse;
use base::index::{IndexOptions, SearchOptions};
use base::operator::Borrowed;
use base::scalar::{ScalarLike, F32};
Expand All @@ -20,15 +20,15 @@ use std::path::Path;
const ZERO: F32 = F32(0.0);

#[allow(dead_code)]
pub struct Inverted<O: OperatorInverted> {
pub struct InvertedSparse<O: OperatorInvertedSparse> {
storage: O::Storage,
payloads: MmapArray<Payload>,
indexes: Json<Vec<u32>>,
offsets: Json<Vec<u32>>,
scores: Json<Vec<F32>>,
}

impl<O: OperatorInverted> Inverted<O> {
impl<O: OperatorInvertedSparse> InvertedSparse<O> {
pub fn create(path: impl AsRef<Path>, options: IndexOptions, source: &impl Source<O>) -> Self {
let remapped = RemappedCollection::from_source(source);
from_nothing(path, options, &remapped)
Expand Down Expand Up @@ -83,12 +83,12 @@ impl<O: OperatorInverted> Inverted<O> {
}
}

fn from_nothing<O: OperatorInverted>(
fn from_nothing<O: OperatorInvertedSparse>(
path: impl AsRef<Path>,
_: IndexOptions,
collection: &impl Collection<O>,
) -> Inverted<O> {
create_dir(path.as_ref()).expect("failed to create path for inverted index");
) -> InvertedSparse<O> {
create_dir(path.as_ref()).expect("failed to create path for inverted sparse index");

let mut token_collection = BTreeMap::new();
for i in 0..collection.len() {
Expand All @@ -110,7 +110,7 @@ fn from_nothing<O: OperatorInverted>(
let json_offset = Json::create(path.as_ref().join("offsets"), offsets);
let json_score = Json::create(path.as_ref().join("scores"), scores);
sync_dir(path);
Inverted {
InvertedSparse {
storage,
payloads,
indexes: json_index,
Expand All @@ -119,13 +119,13 @@ fn from_nothing<O: OperatorInverted>(
}
}

fn open<O: OperatorInverted>(path: impl AsRef<Path>) -> Inverted<O> {
fn open<O: OperatorInvertedSparse>(path: impl AsRef<Path>) -> InvertedSparse<O> {
let storage = O::Storage::open(path.as_ref().join("storage"));
let payloads = MmapArray::open(path.as_ref().join("payloads"));
let offsets = Json::open(path.as_ref().join("offsets"));
let indexes = Json::open(path.as_ref().join("indexes"));
let scores = Json::open(path.as_ref().join("scores"));
Inverted {
InvertedSparse {
storage,
payloads,
indexes,
Expand Down
45 changes: 24 additions & 21 deletions crates/inverted/src/operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,38 @@ use base::scalar::F32;
use quantization::operator::OperatorQuantization;
use storage::OperatorStorage;

pub trait OperatorInverted: OperatorQuantization + OperatorStorage {
fn to_index_vec(vec: Borrowed<'_, Self>) -> Vec<(u32, F32)>;
use std::iter::{Empty, zip};

pub trait OperatorInvertedSparse: OperatorQuantization + OperatorStorage {
fn to_index_vec(vec: Borrowed<'_, Self>) -> impl Iterator<Item = (u32, F32)>;
}

impl OperatorInverted for SVecf32Dot {
fn to_index_vec(vec: Borrowed<'_, Self>) -> Vec<(u32, F32)> {
std::iter::zip(vec.indexes().to_vec(), vec.values().to_vec()).collect()
impl OperatorInvertedSparse for SVecf32Dot {
fn to_index_vec(vec: Borrowed<'_, Self>) -> impl Iterator<Item = (u32, F32)> {
zip(vec.indexes().to_vec(), vec.values().to_vec())
}
}

macro_rules! unimpl_operator_inverted {
macro_rules! unimpl_operator_inverted_sparse {
($t:ty) => {
impl OperatorInverted for $t {
fn to_index_vec(_: Borrowed<'_, Self>) -> Vec<(u32, F32)> {
unimplemented!()
impl OperatorInvertedSparse for $t {
fn to_index_vec(_: Borrowed<'_, Self>) -> impl Iterator<Item = (u32, F32)> {
#![allow(unreachable_code)]
unimplemented!() as Empty<(u32, F32)>
}
}
};
}

unimpl_operator_inverted!(SVecf32Cos);
unimpl_operator_inverted!(SVecf32L2);
unimpl_operator_inverted!(BVecf32Cos);
unimpl_operator_inverted!(BVecf32Dot);
unimpl_operator_inverted!(BVecf32Jaccard);
unimpl_operator_inverted!(BVecf32L2);
unimpl_operator_inverted!(Vecf32Cos);
unimpl_operator_inverted!(Vecf32Dot);
unimpl_operator_inverted!(Vecf32L2);
unimpl_operator_inverted!(Vecf16Cos);
unimpl_operator_inverted!(Vecf16Dot);
unimpl_operator_inverted!(Vecf16L2);
unimpl_operator_inverted_sparse!(SVecf32Cos);
unimpl_operator_inverted_sparse!(SVecf32L2);
unimpl_operator_inverted_sparse!(BVecf32Cos);
unimpl_operator_inverted_sparse!(BVecf32Dot);
unimpl_operator_inverted_sparse!(BVecf32Jaccard);
unimpl_operator_inverted_sparse!(BVecf32L2);
unimpl_operator_inverted_sparse!(Vecf32Cos);
unimpl_operator_inverted_sparse!(Vecf32Dot);
unimpl_operator_inverted_sparse!(Vecf32L2);
unimpl_operator_inverted_sparse!(Vecf16Cos);
unimpl_operator_inverted_sparse!(Vecf16Dot);
unimpl_operator_inverted_sparse!(Vecf16L2);

0 comments on commit 3088f6c

Please sign in to comment.