diff --git a/CHANGELOG-3.0.md b/CHANGELOG-3.0.md index ab3f7c502332b..38aa951d4a504 100644 --- a/CHANGELOG-3.0.md +++ b/CHANGELOG-3.0.md @@ -35,6 +35,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Refactor the codebase to eliminate top level split packages for JPMS support ([#17153](https://github.com/opensearch-project/OpenSearch/pull/17153) - Refactor `:server` module `org.apacge.lucene` package to eliminate top level split packages for JPMS support ([#17241](https://github.com/opensearch-project/OpenSearch/pull/17241)) - Refactor the `:server` module `org.opensearch.client` to `org.opensearch.transport.client` to eliminate top level split packages for JPMS support ([#17272](https://github.com/opensearch-project/OpenSearch/pull/17272)) +- Use Lucene `BM25Similarity` as default since the `LegacyBM25Similarity` is marked as deprecated ([#17306](https://github.com/opensearch-project/OpenSearch/pull/17306)) ### Deprecated diff --git a/modules/parent-join/src/yamlRestTest/resources/rest-api-spec/test/30_inner_hits.yml b/modules/parent-join/src/yamlRestTest/resources/rest-api-spec/test/30_inner_hits.yml index 5ba4077beac46..4e3d079d648ef 100644 --- a/modules/parent-join/src/yamlRestTest/resources/rest-api-spec/test/30_inner_hits.yml +++ b/modules/parent-join/src/yamlRestTest/resources/rest-api-spec/test/30_inner_hits.yml @@ -79,11 +79,11 @@ teardown: - match: { hits.total.value: 2 } - - match: { hits.hits.0._id: "3" } - - match: { hits.hits.0.inner_hits.question.hits.total.value: 0} - - match: { hits.hits.1._id: "2" } - - match: { hits.hits.1.inner_hits.question.hits.total.value: 1} - - match: { hits.hits.1.inner_hits.question.hits.hits.0._id: "1"} + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0.inner_hits.question.hits.total.value: 1 } + - match: { hits.hits.0.inner_hits.question.hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.1.inner_hits.question.hits.total.value: 0 } --- "HasParent disallow expensive queries": diff --git a/modules/parent-join/src/yamlRestTest/resources/rest-api-spec/test/50_legacy_bm25.yml b/modules/parent-join/src/yamlRestTest/resources/rest-api-spec/test/50_legacy_bm25.yml new file mode 100644 index 0000000000000..e5b242685064b --- /dev/null +++ b/modules/parent-join/src/yamlRestTest/resources/rest-api-spec/test/50_legacy_bm25.yml @@ -0,0 +1,50 @@ +--- +setup: + - do: + indices.create: + index: legacy_bm25_test + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + similarity: + default: + type: LegacyBM25 + k1: 1.2 + b: 0.75 + mappings: + properties: + content: + type: text + - do: + index: + index: legacy_bm25_test + id: "1" + body: { "content": "This is a test document for legacy BM25 scoring" } + - do: + index: + index: legacy_bm25_test + id: "2" + body: { "content": "legacy legacy legacy scoring" } + - do: + indices.refresh: + index: legacy_bm25_test + +--- +"Legacy BM25 search": + - do: + search: + index: legacy_bm25_test + body: + query: + match: + content: "legacy" + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "1" } + +--- +teardown: + - do: + indices.delete: + index: legacy_bm25_test diff --git a/server/src/internalClusterTest/java/org/opensearch/search/basic/TransportTwoNodesSearchIT.java b/server/src/internalClusterTest/java/org/opensearch/search/basic/TransportTwoNodesSearchIT.java index b9f16a60d68df..cc88d399932c8 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/basic/TransportTwoNodesSearchIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/basic/TransportTwoNodesSearchIT.java @@ -73,6 +73,7 @@ import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertNoFailures; import static org.opensearch.transport.client.Requests.createIndexRequest; import static org.opensearch.transport.client.Requests.searchRequest; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.notNullValue; @@ -179,12 +180,12 @@ public void testDfsQueryThenFetch() throws Exception { SearchHit hit = hits[i]; assertThat(hit.getExplanation(), notNullValue()); assertThat(hit.getExplanation().getDetails().length, equalTo(1)); - assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(3)); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails().length, equalTo(2)); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getDescription(), startsWith("n,")); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getValue(), equalTo(100L)); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getDescription(), startsWith("N,")); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getValue(), equalTo(100L)); + assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(2)); + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDescription(), startsWith("idf")); + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails().length, equalTo(2)); + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[0].getValue(), equalTo(100L)); + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[1].getValue(), equalTo(100L)); + assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDescription(), containsString("freq")); assertThat( "id[" + hit.getId() + "] -> " + hit.getExplanation().toString(), hit.getId(), @@ -221,12 +222,12 @@ public void testDfsQueryThenFetchWithSort() throws Exception { SearchHit hit = hits[i]; assertThat(hit.getExplanation(), notNullValue()); assertThat(hit.getExplanation().getDetails().length, equalTo(1)); - assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(3)); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails().length, equalTo(2)); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getDescription(), startsWith("n,")); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getValue(), equalTo(100L)); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getDescription(), startsWith("N,")); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getValue(), equalTo(100L)); + assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(2)); + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDescription(), startsWith("idf")); + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails().length, equalTo(2)); + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[0].getValue(), equalTo(100L)); + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[1].getValue(), equalTo(100L)); + assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDescription(), containsString("freq")); assertThat("id[" + hit.getId() + "]", hit.getId(), equalTo(Integer.toString(total + i))); } total += hits.length; diff --git a/server/src/internalClusterTest/java/org/opensearch/search/nested/SimpleNestedIT.java b/server/src/internalClusterTest/java/org/opensearch/search/nested/SimpleNestedIT.java index 9aad23cdb9544..6746a72329232 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/nested/SimpleNestedIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/nested/SimpleNestedIT.java @@ -496,7 +496,7 @@ public void testExplainWithSingleDoc() throws Exception { assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(1L)); Explanation explanation = searchResponse.getHits().getHits()[0].getExplanation(); assertThat(explanation.getValue(), equalTo(searchResponse.getHits().getHits()[0].getScore())); - assertThat(explanation.toString(), startsWith("0.36464313 = Score based on 2 child docs in range from 0 to 1")); + assertThat(explanation.toString(), startsWith("0.16574687 = Score based on 2 child docs in range from 0 to 1")); } public void testSimpleNestedSorting() throws Exception { diff --git a/server/src/main/java/org/opensearch/index/similarity/SimilarityProviders.java b/server/src/main/java/org/opensearch/index/similarity/SimilarityProviders.java index 4fbd717b64496..3465632eee6da 100644 --- a/server/src/main/java/org/opensearch/index/similarity/SimilarityProviders.java +++ b/server/src/main/java/org/opensearch/index/similarity/SimilarityProviders.java @@ -35,6 +35,7 @@ import org.apache.lucene.search.similarities.AfterEffect; import org.apache.lucene.search.similarities.AfterEffectB; import org.apache.lucene.search.similarities.AfterEffectL; +import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.BasicModel; import org.apache.lucene.search.similarities.BasicModelG; import org.apache.lucene.search.similarities.BasicModelIF; @@ -62,6 +63,7 @@ import org.apache.lucene.search.similarities.NormalizationH2; import org.apache.lucene.search.similarities.NormalizationH3; import org.apache.lucene.search.similarities.NormalizationZ; +import org.apache.lucene.search.similarities.Similarity; import org.opensearch.Version; import org.opensearch.common.logging.DeprecationLogger; import org.opensearch.common.settings.Settings; @@ -271,13 +273,21 @@ static void assertSettingsIsSubsetOf(String type, Version version, Settings sett } } - public static LegacyBM25Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) { + public static Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) { assertSettingsIsSubsetOf("BM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS); float k1 = settings.getAsFloat("k1", 1.2f); float b = settings.getAsFloat("b", 0.75f); boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true); + return new BM25Similarity(k1, b, discountOverlaps); + } + + public static Similarity createLegacyBM25Similarity(Settings settings, Version indexCreatedVersion) { + assertSettingsIsSubsetOf("LegacyBM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS); + float k1 = settings.getAsFloat("k1", 1.2f); + float b = settings.getAsFloat("b", 0.75f); + boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true); return new LegacyBM25Similarity(k1, b, discountOverlaps); } diff --git a/server/src/main/java/org/opensearch/index/similarity/SimilarityService.java b/server/src/main/java/org/opensearch/index/similarity/SimilarityService.java index 203068e08c3ce..ba2cf81c9a624 100644 --- a/server/src/main/java/org/opensearch/index/similarity/SimilarityService.java +++ b/server/src/main/java/org/opensearch/index/similarity/SimilarityService.java @@ -37,6 +37,7 @@ import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.TermStatistics; +import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.BooleanSimilarity; import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper; import org.apache.lucene.search.similarities.Similarity; @@ -52,7 +53,6 @@ import org.opensearch.index.IndexSettings; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.MapperService; -import org.opensearch.lucene.similarity.LegacyBM25Similarity; import org.opensearch.script.ScriptService; import java.util.Collections; @@ -84,7 +84,7 @@ public final class SimilarityService extends AbstractIndexComponent { }; }); defaults.put("BM25", version -> { - final LegacyBM25Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version); + final Similarity similarity = new BM25Similarity(); return () -> similarity; }); defaults.put("boolean", version -> { @@ -100,6 +100,7 @@ public final class SimilarityService extends AbstractIndexComponent { ); }); builtIn.put("BM25", (settings, version, scriptService) -> SimilarityProviders.createBM25Similarity(settings, version)); + builtIn.put("LegacyBM25", (settings, version, scriptService) -> SimilarityProviders.createLegacyBM25Similarity(settings, version)); builtIn.put("boolean", (settings, version, scriptService) -> SimilarityProviders.createBooleanSimilarity(settings, version)); builtIn.put("DFR", (settings, version, scriptService) -> SimilarityProviders.createDfrSimilarity(settings, version)); builtIn.put("IB", (settings, version, scriptService) -> SimilarityProviders.createIBSimilarity(settings, version)); diff --git a/server/src/test/java/org/opensearch/index/similarity/SimilarityServiceTests.java b/server/src/test/java/org/opensearch/index/similarity/SimilarityServiceTests.java index dbe4b4c7a2c30..7eb1b3b676cf3 100644 --- a/server/src/test/java/org/opensearch/index/similarity/SimilarityServiceTests.java +++ b/server/src/test/java/org/opensearch/index/similarity/SimilarityServiceTests.java @@ -34,6 +34,7 @@ import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.TermStatistics; +import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.BooleanSimilarity; import org.apache.lucene.search.similarities.Similarity; import org.opensearch.Version; @@ -53,6 +54,13 @@ public void testDefaultSimilarity() { Settings settings = Settings.builder().build(); IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings); SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap()); + assertThat(service.getDefaultSimilarity(), instanceOf(BM25Similarity.class)); + } + + public void testLegacySimilarity() { + Settings settings = Settings.builder().put("index.similarity.default.type", "LegacyBM25").build(); + IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings); + SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap()); assertThat(service.getDefaultSimilarity(), instanceOf(LegacyBM25Similarity.class)); } diff --git a/server/src/test/java/org/opensearch/index/similarity/SimilarityTests.java b/server/src/test/java/org/opensearch/index/similarity/SimilarityTests.java index 247b31bc0e579..1fafa4739b8b4 100644 --- a/server/src/test/java/org/opensearch/index/similarity/SimilarityTests.java +++ b/server/src/test/java/org/opensearch/index/similarity/SimilarityTests.java @@ -33,6 +33,7 @@ package org.opensearch.index.similarity; import org.apache.lucene.search.similarities.AfterEffectL; +import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.BasicModelG; import org.apache.lucene.search.similarities.BooleanSimilarity; import org.apache.lucene.search.similarities.DFISimilarity; @@ -72,7 +73,7 @@ protected Collection> getPlugins() { public void testResolveDefaultSimilarities() { SimilarityService similarityService = createIndex("foo").similarityService(); - assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(LegacyBM25Similarity.class)); + assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class)); assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class)); assertThat(similarityService.getSimilarity("default"), equalTo(null)); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> similarityService.getSimilarity("classic")); @@ -83,7 +84,29 @@ public void testResolveDefaultSimilarities() { ); } - public void testResolveSimilaritiesFromMapping_classicIsForbidden() throws IOException { + public void testResolveLegacySimilarity() throws IOException { + Settings settings = Settings.builder() + .put("index.similarity.my_similarity.type", "LegacyBM25") + .put("index.similarity.my_similarity.k1", 1.2f) + .put("index.similarity.my_similarity.b", 0.75f) + .put("index.similarity.my_similarity.discount_overlaps", false) + .build(); + + XContentBuilder mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject("dummy") + .field("type", "text") + .field("similarity", "my_similarity") + .endObject() + .endObject() + .endObject(); + + MapperService mapperService = createIndex("foo", settings, "type", mapping).mapperService(); + assertThat(mapperService.fieldType("dummy").getTextSearchInfo().getSimilarity().get(), instanceOf(LegacyBM25Similarity.class)); + } + + public void testResolveSimilaritiesFromMapping_classicIsForbidden() { Settings indexSettings = Settings.builder() .put("index.similarity.my_similarity.type", "classic") .put("index.similarity.my_similarity.discount_overlaps", false) @@ -114,12 +137,9 @@ public void testResolveSimilaritiesFromMapping_bm25() throws IOException { .put("index.similarity.my_similarity.discount_overlaps", false) .build(); MapperService mapperService = createIndex("foo", indexSettings, "type", mapping).mapperService(); - assertThat(mapperService.fieldType("field1").getTextSearchInfo().getSimilarity().get(), instanceOf(LegacyBM25Similarity.class)); + assertThat(mapperService.fieldType("field1").getTextSearchInfo().getSimilarity().get(), instanceOf(BM25Similarity.class)); - LegacyBM25Similarity similarity = (LegacyBM25Similarity) mapperService.fieldType("field1") - .getTextSearchInfo() - .getSimilarity() - .get(); + BM25Similarity similarity = (BM25Similarity) mapperService.fieldType("field1").getTextSearchInfo().getSimilarity().get(); assertThat(similarity.getK1(), equalTo(2.0f)); assertThat(similarity.getB(), equalTo(0.5f)); assertThat(similarity.getDiscountOverlaps(), equalTo(false));