-
Notifications
You must be signed in to change notification settings - Fork 140
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Introduce RemoteIndexBuilder skeleton
Signed-off-by: Jay Deng <[email protected]>
- Loading branch information
Showing
21 changed files
with
2,460 additions
and
71 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
120 changes: 120 additions & 0 deletions
120
...n/java/org/opensearch/knn/index/codec/KNN10010Codec/KNN10010PerFieldKnnVectorsFormat.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.knn.index.codec.KNN10010Codec; | ||
|
||
import org.apache.lucene.codecs.KnnVectorsFormat; | ||
import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; | ||
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat; | ||
import org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat; | ||
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; | ||
import org.opensearch.common.Nullable; | ||
import org.opensearch.common.collect.Tuple; | ||
import org.opensearch.index.mapper.MapperService; | ||
import org.opensearch.knn.index.KNNSettings; | ||
import org.opensearch.knn.index.SpaceType; | ||
import org.opensearch.knn.index.codec.BasePerFieldKnnVectorsFormat; | ||
import org.opensearch.knn.index.codec.KNN9120Codec.KNN9120HnswBinaryVectorsFormat; | ||
import org.opensearch.knn.index.engine.KNNEngine; | ||
import org.opensearch.knn.index.remote.RemoteIndexBuilder; | ||
|
||
import java.util.Optional; | ||
import java.util.concurrent.ExecutorService; | ||
import java.util.concurrent.Executors; | ||
|
||
/** | ||
* Class provides per field format implementation for Lucene Knn vector type | ||
*/ | ||
public class KNN10010PerFieldKnnVectorsFormat extends BasePerFieldKnnVectorsFormat { | ||
private static final Tuple<Integer, ExecutorService> DEFAULT_MERGE_THREAD_COUNT_AND_EXECUTOR_SERVICE = Tuple.tuple(1, null); | ||
@Nullable | ||
private RemoteIndexBuilder remoteIndexBuilder; | ||
|
||
public KNN10010PerFieldKnnVectorsFormat(final Optional<MapperService> mapperService, final RemoteIndexBuilder remoteIndexBuilder) { | ||
super( | ||
mapperService, | ||
Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN, | ||
Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH, | ||
Lucene99HnswVectorsFormat::new, | ||
knnVectorsFormatParams -> { | ||
final Tuple<Integer, ExecutorService> mergeThreadCountAndExecutorService = getMergeThreadCountAndExecutorService(); | ||
// There is an assumption here that hamming space will only be used for binary vectors. This will need to be fixed if that | ||
// changes in the future. | ||
if (knnVectorsFormatParams.getSpaceType() == SpaceType.HAMMING) { | ||
return new KNN9120HnswBinaryVectorsFormat( | ||
knnVectorsFormatParams.getMaxConnections(), | ||
knnVectorsFormatParams.getBeamWidth(), | ||
// number of merge threads | ||
mergeThreadCountAndExecutorService.v1(), | ||
// executor service | ||
mergeThreadCountAndExecutorService.v2() | ||
); | ||
} else { | ||
return new Lucene99HnswVectorsFormat( | ||
knnVectorsFormatParams.getMaxConnections(), | ||
knnVectorsFormatParams.getBeamWidth(), | ||
// number of merge threads | ||
mergeThreadCountAndExecutorService.v1(), | ||
// executor service | ||
mergeThreadCountAndExecutorService.v2() | ||
); | ||
} | ||
}, | ||
knnScalarQuantizedVectorsFormatParams -> { | ||
final Tuple<Integer, ExecutorService> mergeThreadCountAndExecutorService = getMergeThreadCountAndExecutorService(); | ||
return new Lucene99HnswScalarQuantizedVectorsFormat( | ||
knnScalarQuantizedVectorsFormatParams.getMaxConnections(), | ||
knnScalarQuantizedVectorsFormatParams.getBeamWidth(), | ||
// Number of merge threads | ||
mergeThreadCountAndExecutorService.v1(), | ||
knnScalarQuantizedVectorsFormatParams.getBits(), | ||
knnScalarQuantizedVectorsFormatParams.isCompressFlag(), | ||
knnScalarQuantizedVectorsFormatParams.getConfidenceInterval(), | ||
// Executor service | ||
mergeThreadCountAndExecutorService.v2() | ||
); | ||
} | ||
); | ||
this.remoteIndexBuilder = remoteIndexBuilder; | ||
} | ||
|
||
public KNN10010PerFieldKnnVectorsFormat(final Optional<MapperService> mapperService) { | ||
this(mapperService, null); | ||
} | ||
|
||
/** | ||
* This method returns the maximum dimension allowed from KNNEngine for Lucene codec | ||
* | ||
* @param fieldName Name of the field, ignored | ||
* @return Maximum constant dimension set by KNNEngine | ||
*/ | ||
@Override | ||
public int getMaxDimensions(String fieldName) { | ||
return KNNEngine.getMaxDimensionByEngine(KNNEngine.LUCENE); | ||
} | ||
|
||
private static Tuple<Integer, ExecutorService> getMergeThreadCountAndExecutorService() { | ||
// To ensure that only once we are fetching the settings per segment, we are fetching the num threads once while | ||
// creating the executors | ||
int mergeThreadCount = KNNSettings.getIndexThreadQty(); | ||
// We need to return null whenever the merge threads are <=1, as lucene assumes that if number of threads are 1 | ||
// then we should be giving a null value of the executor | ||
if (mergeThreadCount <= 1) { | ||
return DEFAULT_MERGE_THREAD_COUNT_AND_EXECUTOR_SERVICE; | ||
} else { | ||
return Tuple.tuple(mergeThreadCount, Executors.newFixedThreadPool(mergeThreadCount)); | ||
} | ||
} | ||
|
||
@Override | ||
protected KnnVectorsFormat nativeEngineVectorsFormat() { | ||
int approximateThreshold = getApproximateThresholdValue(); | ||
return new NativeEngines10010KnnVectorsFormat( | ||
new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()), | ||
approximateThreshold, | ||
remoteIndexBuilder | ||
); | ||
} | ||
} |
112 changes: 112 additions & 0 deletions
112
...java/org/opensearch/knn/index/codec/KNN10010Codec/NativeEngines10010KnnVectorsFormat.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
* | ||
* Modifications Copyright OpenSearch Contributors. See | ||
* GitHub history for details. | ||
*/ | ||
|
||
package org.opensearch.knn.index.codec.KNN10010Codec; | ||
|
||
import org.apache.lucene.codecs.KnnVectorsFormat; | ||
import org.apache.lucene.codecs.KnnVectorsReader; | ||
import org.apache.lucene.codecs.KnnVectorsWriter; | ||
import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer; | ||
import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; | ||
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat; | ||
import org.apache.lucene.index.SegmentReadState; | ||
import org.apache.lucene.index.SegmentWriteState; | ||
import org.opensearch.common.Nullable; | ||
import org.opensearch.knn.index.KNNSettings; | ||
import org.opensearch.knn.index.codec.KNN990Codec.NativeEngines990KnnVectorsReader; | ||
import org.opensearch.knn.index.engine.KNNEngine; | ||
import org.opensearch.knn.index.remote.RemoteIndexBuilder; | ||
|
||
import java.io.IOException; | ||
|
||
/** | ||
* This is a Vector format that will be used for Native engines like Faiss and Nmslib for reading and writing vector | ||
* related data structures. | ||
*/ | ||
public class NativeEngines10010KnnVectorsFormat extends KnnVectorsFormat { | ||
/** The format for storing, reading, merging vectors on disk */ | ||
private static FlatVectorsFormat flatVectorsFormat; | ||
private static final String FORMAT_NAME = "NativeEngines10010KnnVectorsFormat"; | ||
private static int approximateThreshold; | ||
@Nullable | ||
private final RemoteIndexBuilder remoteIndexBuilder; | ||
|
||
// For Testing Only | ||
public NativeEngines10010KnnVectorsFormat() { | ||
this(new Lucene99FlatVectorsFormat(new DefaultFlatVectorScorer())); | ||
} | ||
|
||
// For Testing Only | ||
public NativeEngines10010KnnVectorsFormat(int approximateThreshold) { | ||
this(new Lucene99FlatVectorsFormat(new DefaultFlatVectorScorer()), approximateThreshold, null); | ||
} | ||
|
||
// For Testing Only | ||
public NativeEngines10010KnnVectorsFormat(final FlatVectorsFormat flatVectorsFormat) { | ||
this(flatVectorsFormat, KNNSettings.INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD_DEFAULT_VALUE, null); | ||
} | ||
|
||
public NativeEngines10010KnnVectorsFormat( | ||
final FlatVectorsFormat flatVectorsFormat, | ||
int approximateThreshold, | ||
RemoteIndexBuilder remoteIndexBuilder | ||
) { | ||
super(FORMAT_NAME); | ||
NativeEngines10010KnnVectorsFormat.flatVectorsFormat = flatVectorsFormat; | ||
NativeEngines10010KnnVectorsFormat.approximateThreshold = approximateThreshold; | ||
this.remoteIndexBuilder = remoteIndexBuilder; | ||
} | ||
|
||
/** | ||
* Returns a {@link org.apache.lucene.codecs.KnnVectorsWriter} to write the vectors to the index. | ||
* | ||
* @param state {@link org.apache.lucene.index.SegmentWriteState} | ||
*/ | ||
@Override | ||
public KnnVectorsWriter fieldsWriter(final SegmentWriteState state) throws IOException { | ||
return new NativeEngines10010KnnVectorsWriter( | ||
state, | ||
flatVectorsFormat.fieldsWriter(state), | ||
approximateThreshold, | ||
remoteIndexBuilder | ||
); | ||
} | ||
|
||
/** | ||
* Returns a {@link org.apache.lucene.codecs.KnnVectorsReader} to read the vectors from the index. | ||
* | ||
* @param state {@link org.apache.lucene.index.SegmentReadState} | ||
*/ | ||
@Override | ||
public KnnVectorsReader fieldsReader(final SegmentReadState state) throws IOException { | ||
return new NativeEngines990KnnVectorsReader(state, flatVectorsFormat.fieldsReader(state)); | ||
} | ||
|
||
/** | ||
* @param s | ||
* @return | ||
*/ | ||
@Override | ||
public int getMaxDimensions(String s) { | ||
return KNNEngine.getMaxDimensionByEngine(KNNEngine.LUCENE); | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return "NativeEngines99KnnVectorsFormat(name=" | ||
+ this.getClass().getSimpleName() | ||
+ ", flatVectorsFormat=" | ||
+ flatVectorsFormat | ||
+ ", approximateThreshold=" | ||
+ approximateThreshold | ||
+ ")"; | ||
} | ||
} |
Oops, something went wrong.