Skip to content

Commit

Permalink
introduce jVector to the supported KNN engines
Browse files Browse the repository at this point in the history
  • Loading branch information
sam-herman committed Feb 7, 2025
1 parent 8374b8f commit 125e473
Show file tree
Hide file tree
Showing 39 changed files with 2,582 additions and 90 deletions.
18 changes: 15 additions & 3 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,10 @@ dependencies {
}
testFixturesImplementation "org.opensearch:common-utils:${version}"
implementation 'com.github.oshi:oshi-core:6.4.13'

implementation 'io.github.jbellis:jvector:4.0.0-beta.2-SNAPSHOT'
implementation 'org.agrona:agrona:1.20.0'

api "net.java.dev.jna:jna:5.13.0"
api "net.java.dev.jna:jna-platform:5.13.0"
// OpenSearch core is using slf4j 1.7.36. Therefore, we cannot change the version here.
Expand All @@ -331,7 +335,7 @@ task windowsPatches(type:Exec) {
task cmakeJniLib(type:Exec) {
workingDir 'jni'
def args = []
args.add("cmake")
args.add("/opt/homebrew/bin/cmake")
args.add(".")
args.add("-DKNN_PLUGIN_VERSION=${opensearch_version}")
args.add("-DAVX2_ENABLED=${avx2_enabled}")
Expand Down Expand Up @@ -364,6 +368,8 @@ test {
dependsOn buildJniLib
systemProperty 'tests.security.manager', 'false'
systemProperty "java.library.path", "$rootDir/jni/release"
systemProperty 'log4j.configurationFile', "$rootDir/src/test/resources/log4j2.properties"

//this change enables mockito-inline that supports mocking of static classes/calls
systemProperty "jdk.attach.allowAttachSelf", true
if (Os.isFamily(Os.FAMILY_WINDOWS)) {
Expand All @@ -378,6 +384,11 @@ integTest {
dependsOn buildJniLib
}
systemProperty 'tests.security.manager', 'false'
println "Project root directory: ${project.rootDir}"
systemProperty "java.security.policy", "file://${project.rootDir}/src/main/plugin-metadata/plugin-security.policy"
systemProperty 'log4j.configurationFile', "${project.rootDir}/src/test/resources/log4j2.properties"
testLogging.showStandardStreams = true
systemProperty 'tests.output', 'true'
systemProperty 'java.io.tmpdir', opensearch_tmp_dir.absolutePath
systemProperty "java.library.path", "$rootDir/jni/release"
// allows integration test classes to access test resource from project root path
Expand Down Expand Up @@ -421,7 +432,8 @@ integTest {

testClusters.integTest {
testDistribution = "ARCHIVE"

systemProperty "java.security.policy", "file://${project.rootDir}/src/main/plugin-metadata/plugin-security.policy"
systemProperty 'log4j.configurationFile', "${project.rootDir}/src/test/resources/log4j2.properties"
// Optionally install security
if (System.getProperty("security.enabled") != null) {
configureSecurityPlugin(testClusters.integTest)
Expand Down Expand Up @@ -460,7 +472,7 @@ task integTestRemote(type: RestIntegTestTask) {
systemProperty 'cluster.number_of_nodes', "${_numNodes}"

systemProperty 'tests.security.manager', 'false'

systemProperty 'tests.output', 'true'
// Run tests with remote cluster only if rest case is defined
if (System.getProperty("tests.rest.cluster") != null) {
filter {
Expand Down
77 changes: 77 additions & 0 deletions demo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/bin/bash

./gradlew run -PcustomDistributionUrl=file://${HOME}/projects/OpenSearch/distribution/archives/darwin-tar/build/distributions/opensearch-min-3.0.0-SNAPSHOT-darwin-x64.tar.gz

# ping local cluster
curl localhost:9200

# Check test cluster status
curl -X GET "http://localhost:9200/_cluster/health?pretty"

# Create new knn index with 1 shard and 0 replicas
curl -X PUT "localhost:9200/my_knn_index?pretty" -H 'Content-Type: application/json' -d'
{
"settings": {
"index.knn": true,
"index.number_of_shards": 1,
"index.number_of_replicas": 0,
"index.use_compound_file": false
}
}'

# Check index settings
curl -X GET "localhost:9200/my_knn_index/_settings?pretty"

# Add mapping for knn_vector field with jVector engine
curl -X PUT "localhost:9200/my_knn_index/_mapping?pretty" -H 'Content-Type: application/json' -d'
{
"properties": {
"my_vector": {
"type": "knn_vector",
"dimension": 3,
"method": {
"name": "disk_ann",
"space_type": "l2",
"engine": "jvector"
}
}
}
}'


# Check index mapping
curl -X GET "localhost:9200/my_knn_index/_mapping?pretty"

# Add document with knn_vector field
curl -X POST "localhost:9200/_bulk?pretty" -H 'Content-Type: application/json' -d'
{"index": {"_index": "my_knn_index"}}
{"my_vector": [1, 2, 3]}
{"index": {"_index": "my_knn_index"}}
{"my_vector": [4, 5, 6]}
{"index": {"_index": "my_knn_index"}}
{"my_vector": [7, 8, 9]}
'

# refresh index
curl -X POST "localhost:9200/my_knn_index/_refresh?pretty"


# Search for nearest neighbors
curl -X GET "localhost:9200/my_knn_index/_search?pretty" -H 'Content-Type: application/json' -d'
{
"query": {
"knn": {
"my_vector": {
"vector": [1, 2, 3],
"k": 3
}
}
}
}'

# Delete index
curl -X DELETE "localhost:9200/my_knn_index?pretty"


# Check test cluster location
ls -lah build/testclusters/integTest-0/data/nodes/0/indices
Binary file modified gradle/wrapper/gradle-wrapper.jar
Binary file not shown.
9 changes: 2 additions & 7 deletions gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
#
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0
#

distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionSha256Sum=f2b9ed0faf8472cbe469255ae6c86eddb77076c75191741b4a462f33128dd419
distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-all.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
11 changes: 5 additions & 6 deletions gradlew
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
#!/bin/sh
#
# Copyright OpenSearch Contributors
# SPDX-License-Identifier: Apache-2.0
#

#
# Copyright © 2015-2021 the original authors.
Expand All @@ -19,6 +15,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
#

##############################################################################
#
Expand Down Expand Up @@ -59,7 +57,7 @@
# Darwin, MinGW, and NonStop.
#
# (3) This script is generated from the Groovy template
# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
# within the Gradle project.
#
# You can find Gradle at https://github.com/gradle/gradle/.
Expand Down Expand Up @@ -88,7 +86,8 @@ done
# shellcheck disable=SC2034
APP_BASE_NAME=${0##*/}
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s
' "$PWD" ) || exit

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD=maximum
Expand Down
26 changes: 12 additions & 14 deletions gradlew.bat
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
@rem
@rem Copyright OpenSearch Contributors
@rem SPDX-License-Identifier: Apache-2.0
@rem
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -17,6 +13,8 @@
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem
@rem SPDX-License-Identifier: Apache-2.0
@rem

@if "%DEBUG%"=="" @echo off
@rem ##########################################################################
Expand Down Expand Up @@ -47,11 +45,11 @@ set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if %ERRORLEVEL% equ 0 goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
echo. 1>&2
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2
echo. 1>&2
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
echo location of your Java installation. 1>&2

goto fail

Expand All @@ -61,11 +59,11 @@ set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
echo. 1>&2
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2
echo. 1>&2
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
echo location of your Java installation. 1>&2

goto fail

Expand Down
4 changes: 4 additions & 0 deletions src/main/java/org/opensearch/knn/common/KNNConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ public class KNNConstants {
public static final String NAME = "name";
public static final String PARAMETERS = "parameters";
public static final String METHOD_HNSW = "hnsw";
public static final String DISK_ANN = "disk_ann";
public static final String TYPE = "type";
public static final String TYPE_NESTED = "nested";
public static final String PATH = "path";
Expand Down Expand Up @@ -121,6 +122,9 @@ public class KNNConstants {
public static final String FAISS_SIGNED_BYTE_SQ = "SQ8_direct_signed";
public static final String FAISS_SQ_CLIP = "clip";

// JVector specific constants
public static final String JVECTOR_NAME = "jvector";

// Parameter defaults/limits
public static final Integer ENCODER_PARAMETER_PQ_CODE_COUNT_DEFAULT = 1;
public static final Integer ENCODER_PARAMETER_PQ_CODE_COUNT_LIMIT = 1024;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.opensearch.index.mapper.MapperService;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.codec.KNN990Codec.NativeEngines990KnnVectorsFormat;
import org.opensearch.knn.index.codec.jvector.JVectorFormat;
import org.opensearch.knn.index.codec.params.KNNScalarQuantizedVectorsFormatParams;
import org.opensearch.knn.index.codec.params.KNNVectorsFormatParams;
import org.opensearch.knn.index.engine.KNNEngine;
Expand All @@ -24,6 +25,7 @@

import java.util.Map;
import java.util.Optional;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.function.Supplier;

Expand All @@ -42,7 +44,7 @@ public abstract class BasePerFieldKnnVectorsFormat extends PerFieldKnnVectorsFor
private final int defaultMaxConnections;
private final int defaultBeamWidth;
private final Supplier<KnnVectorsFormat> defaultFormatSupplier;
private final Function<KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier;
private final BiFunction<KNNEngine, KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier;
private Function<KNNScalarQuantizedVectorsFormatParams, KnnVectorsFormat> scalarQuantizedVectorsFormatSupplier;
private static final String MAX_CONNECTIONS = "max_connections";
private static final String BEAM_WIDTH = "beam_width";
Expand All @@ -52,7 +54,7 @@ public BasePerFieldKnnVectorsFormat(
int defaultMaxConnections,
int defaultBeamWidth,
Supplier<KnnVectorsFormat> defaultFormatSupplier,
Function<KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier
BiFunction<KNNEngine,KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier
) {
this.mapperService = mapperService;
this.defaultMaxConnections = defaultMaxConnections;
Expand Down Expand Up @@ -89,50 +91,52 @@ public KnnVectorsFormat getKnnVectorsFormatForField(final String field) {
.orElseThrow(() -> new IllegalArgumentException("KNN method context cannot be empty"));
final KNNEngine engine = knnMethodContext.getKnnEngine();
final Map<String, Object> params = knnMethodContext.getMethodComponentContext().getParameters();
switch (engine) {
// All Java engines to use Lucene extensions directly
case JVECTOR:
case LUCENE:
if (params != null && params.containsKey(METHOD_ENCODER_PARAMETER)) {
KNNScalarQuantizedVectorsFormatParams knnScalarQuantizedVectorsFormatParams = new KNNScalarQuantizedVectorsFormatParams(
params,
defaultMaxConnections,
defaultBeamWidth
);
if (knnScalarQuantizedVectorsFormatParams.validate(params)) {
log.debug(
"Initialize KNN vector format for field [{}] with params [{}] = \"{}\", [{}] = \"{}\", [{}] = \"{}\", [{}] = \"{}\"",
field,
MAX_CONNECTIONS,
knnScalarQuantizedVectorsFormatParams.getMaxConnections(),
BEAM_WIDTH,
knnScalarQuantizedVectorsFormatParams.getBeamWidth(),
LUCENE_SQ_CONFIDENCE_INTERVAL,
knnScalarQuantizedVectorsFormatParams.getConfidenceInterval(),
LUCENE_SQ_BITS,
knnScalarQuantizedVectorsFormatParams.getBits()
);
return scalarQuantizedVectorsFormatSupplier.apply(knnScalarQuantizedVectorsFormatParams);
}
}

if (engine == KNNEngine.LUCENE) {
if (params != null && params.containsKey(METHOD_ENCODER_PARAMETER)) {
KNNScalarQuantizedVectorsFormatParams knnScalarQuantizedVectorsFormatParams = new KNNScalarQuantizedVectorsFormatParams(
params,
defaultMaxConnections,
defaultBeamWidth
KNNVectorsFormatParams knnVectorsFormatParams = new KNNVectorsFormatParams(
params,
defaultMaxConnections,
defaultBeamWidth,
knnMethodContext.getSpaceType()
);
if (knnScalarQuantizedVectorsFormatParams.validate(params)) {
log.debug(
"Initialize KNN vector format for field [{}] with params [{}] = \"{}\", [{}] = \"{}\", [{}] = \"{}\", [{}] = \"{}\"",
log.debug(
"Initialize KNN vector format for field [{}] with params [{}] = \"{}\" and [{}] = \"{}\"",
field,
MAX_CONNECTIONS,
knnScalarQuantizedVectorsFormatParams.getMaxConnections(),
knnVectorsFormatParams.getMaxConnections(),
BEAM_WIDTH,
knnScalarQuantizedVectorsFormatParams.getBeamWidth(),
LUCENE_SQ_CONFIDENCE_INTERVAL,
knnScalarQuantizedVectorsFormatParams.getConfidenceInterval(),
LUCENE_SQ_BITS,
knnScalarQuantizedVectorsFormatParams.getBits()
);
return scalarQuantizedVectorsFormatSupplier.apply(knnScalarQuantizedVectorsFormatParams);
}
}

KNNVectorsFormatParams knnVectorsFormatParams = new KNNVectorsFormatParams(
params,
defaultMaxConnections,
defaultBeamWidth,
knnMethodContext.getSpaceType()
);
log.debug(
"Initialize KNN vector format for field [{}] with params [{}] = \"{}\" and [{}] = \"{}\"",
field,
MAX_CONNECTIONS,
knnVectorsFormatParams.getMaxConnections(),
BEAM_WIDTH,
knnVectorsFormatParams.getBeamWidth()
);
return vectorsFormatSupplier.apply(knnVectorsFormatParams);
knnVectorsFormatParams.getBeamWidth()
);
return vectorsFormatSupplier.apply(engine, knnVectorsFormatParams);
default:
// All native engines to use NativeEngines990KnnVectorsFormat
return nativeEngineVectorsFormat();
}

// All native engines to use NativeEngines990KnnVectorsFormat
return nativeEngineVectorsFormat();
}

private NativeEngines990KnnVectorsFormat nativeEngineVectorsFormat() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.knn.index.codec.KNNCodecVersion;
import org.opensearch.knn.index.codec.KNNFormatFacade;

Expand Down
Loading

0 comments on commit 125e473

Please sign in to comment.