mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(keyword-extraction) Soften constraints on keyword patterns, allowing for longer segmented words
This commit is contained in:
parent
7c90b6b414
commit
26c20e18ac
@ -152,7 +152,10 @@ public class DocumentPositionMapper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
boolean matchesWordPattern(String s) {
|
boolean matchesWordPattern(String s) {
|
||||||
// this function is an unrolled version of the regexp [\da-zA-Z]{1,15}([.\-_/:+*][\da-zA-Z]{1,10}){0,4}
|
if (s.length() > 48)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// this function is an unrolled version of the regexp [\da-zA-Z]{1,15}([.\-_/:+*][\da-zA-Z]{1,10}){0,8}
|
||||||
|
|
||||||
String wordPartSeparator = ".-_/:+*";
|
String wordPartSeparator = ".-_/:+*";
|
||||||
|
|
||||||
@ -169,7 +172,7 @@ public class DocumentPositionMapper {
|
|||||||
if (i == 0)
|
if (i == 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
for (int j = 0; j < 5; j++) {
|
for (int j = 0; j < 8; j++) {
|
||||||
if (i == s.length()) return true;
|
if (i == s.length()) return true;
|
||||||
|
|
||||||
if (wordPartSeparator.indexOf(s.charAt(i)) < 0) {
|
if (wordPartSeparator.indexOf(s.charAt(i)) < 0) {
|
||||||
|
@ -30,9 +30,11 @@ class DocumentPositionMapperTest {
|
|||||||
Assertions.assertFalse(positionMapper.matchesWordPattern("1234567890abcdef"));
|
Assertions.assertFalse(positionMapper.matchesWordPattern("1234567890abcdef"));
|
||||||
|
|
||||||
Assertions.assertTrue(positionMapper.matchesWordPattern("test-test-test-test-test"));
|
Assertions.assertTrue(positionMapper.matchesWordPattern("test-test-test-test-test"));
|
||||||
Assertions.assertFalse(positionMapper.matchesWordPattern("test-test-test-test-test-test"));
|
Assertions.assertFalse(positionMapper.matchesWordPattern("test-test-test-test-test-test-test-test-test"));
|
||||||
Assertions.assertTrue(positionMapper.matchesWordPattern("192.168.1.100/24"));
|
Assertions.assertTrue(positionMapper.matchesWordPattern("192.168.1.100/24"));
|
||||||
Assertions.assertTrue(positionMapper.matchesWordPattern("std::vector"));
|
Assertions.assertTrue(positionMapper.matchesWordPattern("std::vector"));
|
||||||
|
Assertions.assertTrue(positionMapper.matchesWordPattern("std::vector::push_back"));
|
||||||
|
|
||||||
Assertions.assertTrue(positionMapper.matchesWordPattern("c++"));
|
Assertions.assertTrue(positionMapper.matchesWordPattern("c++"));
|
||||||
Assertions.assertTrue(positionMapper.matchesWordPattern("m*a*s*h"));
|
Assertions.assertTrue(positionMapper.matchesWordPattern("m*a*s*h"));
|
||||||
Assertions.assertFalse(positionMapper.matchesWordPattern("Stulpnagelstrasse"));
|
Assertions.assertFalse(positionMapper.matchesWordPattern("Stulpnagelstrasse"));
|
||||||
|
Loading…
Reference in New Issue
Block a user