Add smoothing constant to IDF formula in BM25 to prevent negative scores (#5696)

Co-authored-by: prayas7102 <prayas.prithvirajpratap7@example.com>
Co-authored-by: Alex Klymenko <alexanderklmn@gmail.com>
This commit is contained in:
Prayas Kumar
2024-10-10 13:09:22 +05:30
committed by GitHub
parent d4fff30eaa
commit 90d20b3a43
2 changed files with 8 additions and 6 deletions

View File

@ -50,13 +50,15 @@ class BM25InvertedIndexTest {
// Perform search for the term "good"
List<SearchResult> results = index.search("good");
assertFalse(results.isEmpty());
for (SearchResult result : results) {
System.out.println(result);
}
// Validate the ranking based on the provided relevance scores
assertEquals(6, results.get(0).getDocId()); // It's a Wonderful Life should be ranked 1st
assertEquals(7, results.get(1).getDocId()); // The Pursuit of Happyness should be ranked 2nd
assertEquals(1, results.get(0).getDocId()); // The Shawshank Redemption should be ranked 1st
assertEquals(8, results.get(1).getDocId()); // A Few Good Men should be ranked 2nd
assertEquals(5, results.get(2).getDocId()); // Good Will Hunting should be ranked 3rd
assertEquals(8, results.get(3).getDocId()); // A Few Good Men should be ranked 4th
assertEquals(1, results.get(4).getDocId()); // The Shawshank Redemption should be ranked 5th
assertEquals(7, results.get(3).getDocId()); // The Pursuit of Happyness should be ranked 4th
assertEquals(6, results.get(4).getDocId()); // It's a Wonderful Life should be ranked 5th
// Ensure the relevance scores are in descending order
for (int i = 0; i < results.size() - 1; i++) {