public class MultiWordStringMatcher
extends java.lang.Object
| Modifier and Type | Class and Description |
|---|---|
static class |
MultiWordStringMatcher.LongestStringComparator |
static class |
MultiWordStringMatcher.MatchType
if
matchType is EXCT: match exact string
if matchType is EXCTWS: match exact string, except whitespace can match multiple whitespaces
if matchType is LWS: match case insensitive string, except whitespace can match multiple whitespaces
if matchType is LNRM: disregards punctuation, does case insensitive match
if matchType is REGEX: interprets string as regex already |
| Modifier and Type | Field and Description |
|---|---|
static java.util.Comparator<java.lang.String> |
LONGEST_STRING_COMPARATOR |
| Constructor and Description |
|---|
MultiWordStringMatcher(MultiWordStringMatcher.MatchType matchType) |
MultiWordStringMatcher(java.lang.String matchTypeStr) |
| Modifier and Type | Method and Description |
|---|---|
java.util.regex.Pattern |
createPattern(java.lang.String targetString) |
static java.util.List<IntPair> |
findOffsets(java.util.regex.Pattern pattern,
java.lang.String text)
Finds pattern in text and returns offsets
|
static java.util.List<IntPair> |
findOffsets(java.util.regex.Pattern pattern,
java.lang.String text,
int start,
int end)
Finds pattern in text span from character start to end (exclusive) and returns offsets
|
java.util.List<IntPair> |
findTargetStringOffsets(java.lang.String text,
java.lang.String targetString)
Finds target string in text and returns offsets
(matches based on set matchType)
|
java.util.List<IntPair> |
findTargetStringOffsets(java.lang.String text,
java.lang.String targetString,
int start,
int end)
Finds target string in text span from character start to end (exclusive) and returns offsets
(matches based on set matchType)
|
protected java.util.List<IntPair> |
findTargetStringOffsetsExct(java.lang.String text,
java.lang.String targetString,
int start,
int end)
Finds target string in text span from character start to end (exclusive) and returns offsets
(does EXCT string matching)
|
protected java.util.List<IntPair> |
findTargetStringOffsetsRegex(java.lang.String text,
java.lang.String targetString,
int start,
int end)
Finds target string in text and returns offsets using regular expressions
(matches based on set matchType)
|
java.lang.String |
getExctWsRegex(java.lang.String targetString) |
java.lang.String |
getLnrmRegex(java.lang.String targetString) |
java.lang.String |
getLWsRegex(java.lang.String targetString) |
MultiWordStringMatcher.MatchType |
getMatchType() |
java.util.regex.Pattern |
getPattern(java.lang.String targetString) |
java.util.regex.Pattern |
getPattern(java.lang.String[] targetStrings) |
java.lang.String |
getRegex(java.lang.String targetString) |
java.lang.String |
getRegex(java.lang.String[] targetStrings) |
protected java.lang.String |
markTargetString(java.lang.String text,
java.lang.String targetString,
java.lang.String beginMark,
java.lang.String endMark,
boolean markOnlyIfSpace) |
java.lang.String |
putSpacesAroundTargetString(java.lang.String text,
java.lang.String targetString)
Finds target string in text and put spaces around it so it will be matched with we match against tokens
|
void |
setMatchType(MultiWordStringMatcher.MatchType matchType) |
public static final java.util.Comparator<java.lang.String> LONGEST_STRING_COMPARATOR
public MultiWordStringMatcher(MultiWordStringMatcher.MatchType matchType)
public MultiWordStringMatcher(java.lang.String matchTypeStr)
public MultiWordStringMatcher.MatchType getMatchType()
public void setMatchType(MultiWordStringMatcher.MatchType matchType)
public java.lang.String putSpacesAroundTargetString(java.lang.String text,
java.lang.String targetString)
text - - String in which to look for the target stringtargetString - - Target string to look forprotected java.lang.String markTargetString(java.lang.String text,
java.lang.String targetString,
java.lang.String beginMark,
java.lang.String endMark,
boolean markOnlyIfSpace)
protected java.util.List<IntPair> findTargetStringOffsetsExct(java.lang.String text, java.lang.String targetString, int start, int end)
text - - String in which to look for the target stringtargetString - - Target string to look forstart - - position to start searchend - - position to end searchpublic java.util.regex.Pattern getPattern(java.lang.String[] targetStrings)
public java.lang.String getRegex(java.lang.String[] targetStrings)
public java.util.regex.Pattern getPattern(java.lang.String targetString)
public java.util.regex.Pattern createPattern(java.lang.String targetString)
public java.lang.String getRegex(java.lang.String targetString)
public java.lang.String getExctWsRegex(java.lang.String targetString)
public java.lang.String getLWsRegex(java.lang.String targetString)
public java.lang.String getLnrmRegex(java.lang.String targetString)
protected java.util.List<IntPair> findTargetStringOffsetsRegex(java.lang.String text, java.lang.String targetString, int start, int end)
text - - String in which to find target stringtargetString - - Target string to look forstart - - position to start searchend - - position to end searchpublic static java.util.List<IntPair> findOffsets(java.util.regex.Pattern pattern, java.lang.String text)
pattern - - pattern to look fortext - - String in which to look for the patternpublic static java.util.List<IntPair> findOffsets(java.util.regex.Pattern pattern, java.lang.String text, int start, int end)
pattern - - pattern to look fortext - - String in which to look for the patternstart - - position to start searchend - - position to end searchpublic java.util.List<IntPair> findTargetStringOffsets(java.lang.String text, java.lang.String targetString)
text - - String in which to look for the target stringtargetString - - Target string to look forpublic java.util.List<IntPair> findTargetStringOffsets(java.lang.String text, java.lang.String targetString, int start, int end)
text - - String in which to look for the target stringtargetString - - Target string to look forstart - - position to start searchend - - position to end search