(keyword-extraction) Soften constraints on keyword patterns, allowing for longer segmented words

This commit is contained in:
Viktor Lofgren 2025-01-07 15:20:50 +01:00
parent 7c90b6b414
commit 26c20e18ac
2 changed files with 8 additions and 3 deletions

View File

@ -152,7 +152,10 @@ public class DocumentPositionMapper {
} }
boolean matchesWordPattern(String s) { boolean matchesWordPattern(String s) {
// this function is an unrolled version of the regexp [\da-zA-Z]{1,15}([.\-_/:+*][\da-zA-Z]{1,10}){0,4} if (s.length() > 48)
return false;
// this function is an unrolled version of the regexp [\da-zA-Z]{1,15}([.\-_/:+*][\da-zA-Z]{1,10}){0,8}
String wordPartSeparator = ".-_/:+*"; String wordPartSeparator = ".-_/:+*";
@ -169,7 +172,7 @@ public class DocumentPositionMapper {
if (i == 0) if (i == 0)
return false; return false;
for (int j = 0; j < 5; j++) { for (int j = 0; j < 8; j++) {
if (i == s.length()) return true; if (i == s.length()) return true;
if (wordPartSeparator.indexOf(s.charAt(i)) < 0) { if (wordPartSeparator.indexOf(s.charAt(i)) < 0) {

View File

@ -30,9 +30,11 @@ class DocumentPositionMapperTest {
Assertions.assertFalse(positionMapper.matchesWordPattern("1234567890abcdef")); Assertions.assertFalse(positionMapper.matchesWordPattern("1234567890abcdef"));
Assertions.assertTrue(positionMapper.matchesWordPattern("test-test-test-test-test")); Assertions.assertTrue(positionMapper.matchesWordPattern("test-test-test-test-test"));
Assertions.assertFalse(positionMapper.matchesWordPattern("test-test-test-test-test-test")); Assertions.assertFalse(positionMapper.matchesWordPattern("test-test-test-test-test-test-test-test-test"));
Assertions.assertTrue(positionMapper.matchesWordPattern("192.168.1.100/24")); Assertions.assertTrue(positionMapper.matchesWordPattern("192.168.1.100/24"));
Assertions.assertTrue(positionMapper.matchesWordPattern("std::vector")); Assertions.assertTrue(positionMapper.matchesWordPattern("std::vector"));
Assertions.assertTrue(positionMapper.matchesWordPattern("std::vector::push_back"));
Assertions.assertTrue(positionMapper.matchesWordPattern("c++")); Assertions.assertTrue(positionMapper.matchesWordPattern("c++"));
Assertions.assertTrue(positionMapper.matchesWordPattern("m*a*s*h")); Assertions.assertTrue(positionMapper.matchesWordPattern("m*a*s*h"));
Assertions.assertFalse(positionMapper.matchesWordPattern("Stulpnagelstrasse")); Assertions.assertFalse(positionMapper.matchesWordPattern("Stulpnagelstrasse"));