Skip to content

Commit

Permalink
Use dafault BM25Similarity
Browse files Browse the repository at this point in the history
Signed-off-by: Prudhvi Godithi <[email protected]>
  • Loading branch information
prudhvigodithi committed Feb 9, 2025
1 parent 1673e74 commit 3c35909
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ teardown:


- match: { hits.total.value: 2 }
- match: { hits.hits.0._id: "3" }
- match: { hits.hits.0.inner_hits.question.hits.total.value: 0}
- match: { hits.hits.1._id: "2" }
- match: { hits.hits.1.inner_hits.question.hits.total.value: 1}
- match: { hits.hits.1.inner_hits.question.hits.hits.0._id: "1"}
- match: { hits.hits.0._id: "2" }
- match: { hits.hits.0.inner_hits.question.hits.total.value: 1 }
- match: { hits.hits.0.inner_hits.question.hits.hits.0._id: "1" }
- match: { hits.hits.1._id: "3" }
- match: { hits.hits.1.inner_hits.question.hits.total.value: 0 }

---
"HasParent disallow expensive queries":
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
---
setup:
- do:
indices.create:
index: legacy_bm25_test
body:
settings:
number_of_shards: 1
number_of_replicas: 0
similarity:
default:
type: LegacyBM25
k1: 1.2
b: 0.75
mappings:
properties:
content:
type: text
- do:
index:
index: legacy_bm25_test
id: "1"
body: { "content": "This is a test document for legacy BM25 scoring" }
- do:
indices.refresh:
index: legacy_bm25_test

---
"Legacy BM25 search":
- do:
search:
index: legacy_bm25_test
body:
query:
match:
content: "legacy"
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "1" }

---
teardown:
- do:
indices.delete:
index: legacy_bm25_test
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.apache.lucene.search.similarities.AfterEffect;
import org.apache.lucene.search.similarities.AfterEffectB;
import org.apache.lucene.search.similarities.AfterEffectL;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.BasicModel;
import org.apache.lucene.search.similarities.BasicModelG;
import org.apache.lucene.search.similarities.BasicModelIF;
Expand Down Expand Up @@ -62,6 +63,7 @@
import org.apache.lucene.search.similarities.NormalizationH2;
import org.apache.lucene.search.similarities.NormalizationH3;
import org.apache.lucene.search.similarities.NormalizationZ;
import org.apache.lucene.search.similarities.Similarity;
import org.opensearch.Version;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.settings.Settings;
Expand Down Expand Up @@ -271,13 +273,21 @@ static void assertSettingsIsSubsetOf(String type, Version version, Settings sett
}
}

public static LegacyBM25Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) {
public static Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("BM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS);

float k1 = settings.getAsFloat("k1", 1.2f);
float b = settings.getAsFloat("b", 0.75f);
boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true);

return new BM25Similarity(k1, b, discountOverlaps);
}

public static Similarity createLegacyBM25Similarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("LegacyBM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS);
float k1 = settings.getAsFloat("k1", 1.2f);
float b = settings.getAsFloat("b", 0.75f);
boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true);
return new LegacyBM25Similarity(k1, b, discountOverlaps);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity;
Expand All @@ -52,7 +53,6 @@
import org.opensearch.index.IndexSettings;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.lucene.similarity.LegacyBM25Similarity;
import org.opensearch.script.ScriptService;

import java.util.Collections;
Expand Down Expand Up @@ -84,7 +84,7 @@ public final class SimilarityService extends AbstractIndexComponent {
};
});
defaults.put("BM25", version -> {
final LegacyBM25Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version);
final Similarity similarity = new BM25Similarity();
return () -> similarity;
});
defaults.put("boolean", version -> {
Expand All @@ -100,6 +100,7 @@ public final class SimilarityService extends AbstractIndexComponent {
);
});
builtIn.put("BM25", (settings, version, scriptService) -> SimilarityProviders.createBM25Similarity(settings, version));
builtIn.put("LegacyBM25", (settings, version, scriptService) -> SimilarityProviders.createLegacyBM25Similarity(settings, version));
builtIn.put("boolean", (settings, version, scriptService) -> SimilarityProviders.createBooleanSimilarity(settings, version));
builtIn.put("DFR", (settings, version, scriptService) -> SimilarityProviders.createDfrSimilarity(settings, version));
builtIn.put("IB", (settings, version, scriptService) -> SimilarityProviders.createIBSimilarity(settings, version));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.opensearch.Version;
Expand All @@ -53,6 +54,13 @@ public void testDefaultSimilarity() {
Settings settings = Settings.builder().build();
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap());
assertThat(service.getDefaultSimilarity(), instanceOf(BM25Similarity.class));
}

public void testLegacySimilarity() {
Settings settings = Settings.builder().put("index.similarity.default.type", "LegacyBM25").build();
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap());
assertThat(service.getDefaultSimilarity(), instanceOf(LegacyBM25Similarity.class));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
package org.opensearch.index.similarity;

import org.apache.lucene.search.similarities.AfterEffectL;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.BasicModelG;
import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.apache.lucene.search.similarities.DFISimilarity;
Expand Down Expand Up @@ -72,7 +73,7 @@ protected Collection<Class<? extends Plugin>> getPlugins() {

public void testResolveDefaultSimilarities() {
SimilarityService similarityService = createIndex("foo").similarityService();
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(LegacyBM25Similarity.class));
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(org.apache.lucene.search.similarities.BM25Similarity.class));
assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class));
assertThat(similarityService.getSimilarity("default"), equalTo(null));
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> similarityService.getSimilarity("classic"));
Expand All @@ -83,7 +84,29 @@ public void testResolveDefaultSimilarities() {
);
}

public void testResolveSimilaritiesFromMapping_classicIsForbidden() throws IOException {
public void testResolveLegacySimilarity() throws IOException {
Settings settings = Settings.builder()
.put("index.similarity.my_similarity.type", "LegacyBM25")
.put("index.similarity.my_similarity.k1", 1.2f)
.put("index.similarity.my_similarity.b", 0.75f)
.put("index.similarity.my_similarity.discount_overlaps", false)
.build();

XContentBuilder mapping = XContentFactory.jsonBuilder()
.startObject()
.startObject("properties")
.startObject("dummy")
.field("type", "text")
.field("similarity", "my_similarity")
.endObject()
.endObject()
.endObject();

MapperService mapperService = createIndex("foo", settings, "type", mapping).mapperService();
assertThat(mapperService.fieldType("dummy").getTextSearchInfo().getSimilarity().get(), instanceOf(LegacyBM25Similarity.class));
}

public void testResolveSimilaritiesFromMapping_classicIsForbidden() {
Settings indexSettings = Settings.builder()
.put("index.similarity.my_similarity.type", "classic")
.put("index.similarity.my_similarity.discount_overlaps", false)
Expand Down Expand Up @@ -114,9 +137,12 @@ public void testResolveSimilaritiesFromMapping_bm25() throws IOException {
.put("index.similarity.my_similarity.discount_overlaps", false)
.build();
MapperService mapperService = createIndex("foo", indexSettings, "type", mapping).mapperService();
assertThat(mapperService.fieldType("field1").getTextSearchInfo().getSimilarity().get(), instanceOf(LegacyBM25Similarity.class));
assertThat(
mapperService.fieldType("field1").getTextSearchInfo().getSimilarity().get(),
instanceOf(org.apache.lucene.search.similarities.BM25Similarity.class)
);

LegacyBM25Similarity similarity = (LegacyBM25Similarity) mapperService.fieldType("field1")
BM25Similarity similarity = (org.apache.lucene.search.similarities.BM25Similarity) mapperService.fieldType("field1")
.getTextSearchInfo()
.getSimilarity()
.get();
Expand Down

0 comments on commit 3c35909

Please sign in to comment.