Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use default as BM25Similarity #17306

Merged
merged 1 commit into from
Feb 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG-3.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Refactor the codebase to eliminate top level split packages for JPMS support ([#17153](https://github.com/opensearch-project/OpenSearch/pull/17153)
- Refactor `:server` module `org.apacge.lucene` package to eliminate top level split packages for JPMS support ([#17241](https://github.com/opensearch-project/OpenSearch/pull/17241))
- Refactor the `:server` module `org.opensearch.client` to `org.opensearch.transport.client` to eliminate top level split packages for JPMS support ([#17272](https://github.com/opensearch-project/OpenSearch/pull/17272))
- Use Lucene `BM25Similarity` as default since the `LegacyBM25Similarity` is marked as deprecated ([#17306](https://github.com/opensearch-project/OpenSearch/pull/17306))

### Deprecated

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ teardown:


- match: { hits.total.value: 2 }
- match: { hits.hits.0._id: "3" }
- match: { hits.hits.0.inner_hits.question.hits.total.value: 0}
- match: { hits.hits.1._id: "2" }
- match: { hits.hits.1.inner_hits.question.hits.total.value: 1}
- match: { hits.hits.1.inner_hits.question.hits.hits.0._id: "1"}
- match: { hits.hits.0._id: "2" }
- match: { hits.hits.0.inner_hits.question.hits.total.value: 1 }
- match: { hits.hits.0.inner_hits.question.hits.hits.0._id: "1" }
- match: { hits.hits.1._id: "3" }
- match: { hits.hits.1.inner_hits.question.hits.total.value: 0 }
msfroh marked this conversation as resolved.
Show resolved Hide resolved

---
"HasParent disallow expensive queries":
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
---
setup:
- do:
indices.create:
index: legacy_bm25_test
body:
settings:
number_of_shards: 1
number_of_replicas: 0
similarity:
default:
type: LegacyBM25
k1: 1.2
b: 0.75
mappings:
properties:
content:
type: text
- do:
index:
index: legacy_bm25_test
id: "1"
body: { "content": "This is a test document for legacy BM25 scoring" }
- do:
index:
index: legacy_bm25_test
id: "2"
body: { "content": "legacy legacy legacy scoring" }
- do:
indices.refresh:
index: legacy_bm25_test

---
"Legacy BM25 search":
- do:
search:
index: legacy_bm25_test
body:
query:
match:
content: "legacy"
- match: { hits.total.value: 2 }
- match: { hits.hits.0._id: "2" }
- match: { hits.hits.1._id: "1" }

---
teardown:
- do:
indices.delete:
index: legacy_bm25_test
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertNoFailures;
import static org.opensearch.transport.client.Requests.createIndexRequest;
import static org.opensearch.transport.client.Requests.searchRequest;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.notNullValue;
Expand Down Expand Up @@ -179,12 +180,12 @@ public void testDfsQueryThenFetch() throws Exception {
SearchHit hit = hits[i];
assertThat(hit.getExplanation(), notNullValue());
assertThat(hit.getExplanation().getDetails().length, equalTo(1));
assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(3));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails().length, equalTo(2));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getDescription(), startsWith("n,"));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getValue(), equalTo(100L));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getDescription(), startsWith("N,"));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getValue(), equalTo(100L));
assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(2));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDescription(), startsWith("idf"));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails().length, equalTo(2));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[0].getValue(), equalTo(100L));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[1].getValue(), equalTo(100L));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDescription(), containsString("freq"));
assertThat(
"id[" + hit.getId() + "] -> " + hit.getExplanation().toString(),
hit.getId(),
Expand Down Expand Up @@ -221,12 +222,12 @@ public void testDfsQueryThenFetchWithSort() throws Exception {
SearchHit hit = hits[i];
assertThat(hit.getExplanation(), notNullValue());
assertThat(hit.getExplanation().getDetails().length, equalTo(1));
assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(3));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails().length, equalTo(2));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getDescription(), startsWith("n,"));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getValue(), equalTo(100L));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getDescription(), startsWith("N,"));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getValue(), equalTo(100L));
assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(2));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDescription(), startsWith("idf"));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails().length, equalTo(2));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[0].getValue(), equalTo(100L));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[1].getValue(), equalTo(100L));
assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDescription(), containsString("freq"));
assertThat("id[" + hit.getId() + "]", hit.getId(), equalTo(Integer.toString(total + i)));
}
total += hits.length;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ public void testExplainWithSingleDoc() throws Exception {
assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(1L));
Explanation explanation = searchResponse.getHits().getHits()[0].getExplanation();
assertThat(explanation.getValue(), equalTo(searchResponse.getHits().getHits()[0].getScore()));
assertThat(explanation.toString(), startsWith("0.36464313 = Score based on 2 child docs in range from 0 to 1"));
assertThat(explanation.toString(), startsWith("0.16574687 = Score based on 2 child docs in range from 0 to 1"));
msfroh marked this conversation as resolved.
Show resolved Hide resolved
}

public void testSimpleNestedSorting() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.apache.lucene.search.similarities.AfterEffect;
import org.apache.lucene.search.similarities.AfterEffectB;
import org.apache.lucene.search.similarities.AfterEffectL;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.BasicModel;
import org.apache.lucene.search.similarities.BasicModelG;
import org.apache.lucene.search.similarities.BasicModelIF;
Expand Down Expand Up @@ -62,6 +63,7 @@
import org.apache.lucene.search.similarities.NormalizationH2;
import org.apache.lucene.search.similarities.NormalizationH3;
import org.apache.lucene.search.similarities.NormalizationZ;
import org.apache.lucene.search.similarities.Similarity;
import org.opensearch.Version;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.settings.Settings;
Expand Down Expand Up @@ -271,13 +273,21 @@ static void assertSettingsIsSubsetOf(String type, Version version, Settings sett
}
}

public static LegacyBM25Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) {
public static Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("BM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS);

float k1 = settings.getAsFloat("k1", 1.2f);
float b = settings.getAsFloat("b", 0.75f);
boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true);

return new BM25Similarity(k1, b, discountOverlaps);
}

public static Similarity createLegacyBM25Similarity(Settings settings, Version indexCreatedVersion) {
msfroh marked this conversation as resolved.
Show resolved Hide resolved
assertSettingsIsSubsetOf("LegacyBM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS);
float k1 = settings.getAsFloat("k1", 1.2f);
float b = settings.getAsFloat("b", 0.75f);
boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true);
return new LegacyBM25Similarity(k1, b, discountOverlaps);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity;
Expand All @@ -52,7 +53,6 @@
import org.opensearch.index.IndexSettings;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.lucene.similarity.LegacyBM25Similarity;
import org.opensearch.script.ScriptService;

import java.util.Collections;
Expand Down Expand Up @@ -84,7 +84,7 @@ public final class SimilarityService extends AbstractIndexComponent {
};
});
defaults.put("BM25", version -> {
final LegacyBM25Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version);
final Similarity similarity = new BM25Similarity();
return () -> similarity;
});
defaults.put("boolean", version -> {
Expand All @@ -100,6 +100,7 @@ public final class SimilarityService extends AbstractIndexComponent {
);
});
builtIn.put("BM25", (settings, version, scriptService) -> SimilarityProviders.createBM25Similarity(settings, version));
builtIn.put("LegacyBM25", (settings, version, scriptService) -> SimilarityProviders.createLegacyBM25Similarity(settings, version));
msfroh marked this conversation as resolved.
Show resolved Hide resolved
builtIn.put("boolean", (settings, version, scriptService) -> SimilarityProviders.createBooleanSimilarity(settings, version));
builtIn.put("DFR", (settings, version, scriptService) -> SimilarityProviders.createDfrSimilarity(settings, version));
builtIn.put("IB", (settings, version, scriptService) -> SimilarityProviders.createIBSimilarity(settings, version));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.opensearch.Version;
Expand All @@ -53,6 +54,13 @@ public void testDefaultSimilarity() {
Settings settings = Settings.builder().build();
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap());
assertThat(service.getDefaultSimilarity(), instanceOf(BM25Similarity.class));
}

public void testLegacySimilarity() {
Settings settings = Settings.builder().put("index.similarity.default.type", "LegacyBM25").build();
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap());
assertThat(service.getDefaultSimilarity(), instanceOf(LegacyBM25Similarity.class));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
package org.opensearch.index.similarity;

import org.apache.lucene.search.similarities.AfterEffectL;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.BasicModelG;
import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.apache.lucene.search.similarities.DFISimilarity;
Expand Down Expand Up @@ -72,7 +73,7 @@ protected Collection<Class<? extends Plugin>> getPlugins() {

public void testResolveDefaultSimilarities() {
SimilarityService similarityService = createIndex("foo").similarityService();
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(LegacyBM25Similarity.class));
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class));
assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class));
assertThat(similarityService.getSimilarity("default"), equalTo(null));
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> similarityService.getSimilarity("classic"));
Expand All @@ -83,7 +84,29 @@ public void testResolveDefaultSimilarities() {
);
}

public void testResolveSimilaritiesFromMapping_classicIsForbidden() throws IOException {
public void testResolveLegacySimilarity() throws IOException {
Settings settings = Settings.builder()
.put("index.similarity.my_similarity.type", "LegacyBM25")
.put("index.similarity.my_similarity.k1", 1.2f)
.put("index.similarity.my_similarity.b", 0.75f)
.put("index.similarity.my_similarity.discount_overlaps", false)
.build();

XContentBuilder mapping = XContentFactory.jsonBuilder()
.startObject()
.startObject("properties")
.startObject("dummy")
.field("type", "text")
.field("similarity", "my_similarity")
.endObject()
.endObject()
.endObject();

MapperService mapperService = createIndex("foo", settings, "type", mapping).mapperService();
assertThat(mapperService.fieldType("dummy").getTextSearchInfo().getSimilarity().get(), instanceOf(LegacyBM25Similarity.class));
}

public void testResolveSimilaritiesFromMapping_classicIsForbidden() {
Settings indexSettings = Settings.builder()
.put("index.similarity.my_similarity.type", "classic")
.put("index.similarity.my_similarity.discount_overlaps", false)
Expand Down Expand Up @@ -114,12 +137,9 @@ public void testResolveSimilaritiesFromMapping_bm25() throws IOException {
.put("index.similarity.my_similarity.discount_overlaps", false)
.build();
MapperService mapperService = createIndex("foo", indexSettings, "type", mapping).mapperService();
assertThat(mapperService.fieldType("field1").getTextSearchInfo().getSimilarity().get(), instanceOf(LegacyBM25Similarity.class));
assertThat(mapperService.fieldType("field1").getTextSearchInfo().getSimilarity().get(), instanceOf(BM25Similarity.class));

LegacyBM25Similarity similarity = (LegacyBM25Similarity) mapperService.fieldType("field1")
.getTextSearchInfo()
.getSimilarity()
.get();
BM25Similarity similarity = (BM25Similarity) mapperService.fieldType("field1").getTextSearchInfo().getSimilarity().get();
assertThat(similarity.getK1(), equalTo(2.0f));
assertThat(similarity.getB(), equalTo(0.5f));
assertThat(similarity.getDiscountOverlaps(), equalTo(false));
Expand Down
Loading