public class ChineseTreebankLanguagePack extends AbstractTreebankLanguagePack
| Modifier and Type | Field and Description |
|---|---|
static java.lang.String |
ENCODING |
DEFAULT_ENCODING, DEFAULT_GF_CHAR, generateOriginalDependencies, gfCharacter| Constructor and Description |
|---|
ChineseTreebankLanguagePack() |
| Modifier and Type | Method and Description |
|---|---|
static java.util.function.Predicate<java.lang.String> |
chineseColonAcceptFilter() |
static java.util.function.Predicate<java.lang.String> |
chineseCommaAcceptFilter() |
static java.util.function.Predicate<java.lang.String> |
chineseDashAcceptFilter() |
static java.util.function.Predicate<java.lang.String> |
chineseDouHaoAcceptFilter() |
static java.util.function.Predicate<java.lang.String> |
chineseEndSentenceAcceptFilter() |
static java.util.function.Predicate<java.lang.String> |
chineseLeftParenthesisAcceptFilter() |
static java.util.function.Predicate<java.lang.String> |
chineseLeftQuoteMarkAcceptFilter() |
static java.util.function.Predicate<java.lang.String> |
chineseOtherAcceptFilter() |
static java.util.function.Predicate<java.lang.String> |
chineseParenthesisAcceptFilter() |
static java.util.function.Predicate<java.lang.String> |
chineseQuoteMarkAcceptFilter() |
static java.util.function.Predicate<java.lang.String> |
chineseRightParenthesisAcceptFilter() |
static java.util.function.Predicate<java.lang.String> |
chineseRightQuoteMarkAcceptFilter() |
boolean |
generateOriginalDependencies()
Chinese does not support Universal Dependencies yet,
so always return true.
|
java.lang.String |
getEncoding()
Return the input Charset encoding for the Treebank.
|
TokenizerFactory<? extends HasWord> |
getTokenizerFactory()
Return a tokenizer which might be suitable for tokenizing text that
will be used with this Treebank/Language pair, without tokenizing carriage returns (i.e., treating them as white space).
|
GrammaticalStructureFactory |
grammaticalStructureFactory()
Return a GrammaticalStructureFactory suitable for this language/treebank.
|
GrammaticalStructureFactory |
grammaticalStructureFactory(java.util.function.Predicate<java.lang.String> puncFilt)
Return a GrammaticalStructureFactory suitable for this language/treebank.
|
GrammaticalStructureFactory |
grammaticalStructureFactory(java.util.function.Predicate<java.lang.String> puncFilt,
HeadFinder hf)
Return a GrammaticalStructureFactory suitable for this language/treebank.
|
HeadFinder |
headFinder()
The HeadFinder to use for your treebank.
|
boolean |
isEvalBIgnoredPunctuationTag(java.lang.String str)
Accepts a String that is a punctuation
tag that should be ignored by EVALB-style evaluation,
and rejects everything else.
|
boolean |
isPunctuationTag(java.lang.String str)
Accepts a String that is a punctuation
tag name, and rejects everything else.
|
boolean |
isPunctuationWord(java.lang.String str)
Accepts a String that is a punctuation
word, and rejects everything else.
|
boolean |
isSentenceFinalPunctuationTag(java.lang.String str)
Accepts a String that is a sentence end
punctuation tag, and rejects everything else.
|
char[] |
labelAnnotationIntroducingCharacters()
Return an array of characters at which a String should be
truncated to give the basic syntactic category of a label.
|
java.lang.String[] |
punctuationTags()
Returns a String array of punctuation tags for this treebank/language.
|
java.lang.String[] |
punctuationWords()
Returns a String array of punctuation words for this treebank/language.
|
java.lang.String[] |
sentenceFinalPunctuationTags()
Returns a String array of sentence final punctuation tags for this
treebank/language.
|
java.lang.String[] |
sentenceFinalPunctuationWords()
Returns a String array of sentence final punctuation words for this
treebank/language.
|
void |
setTokenizerFactory(TokenizerFactory<? extends HasWord> tf) |
java.lang.String[] |
startSymbols()
Returns a String array of treebank start symbols.
|
boolean |
supportsGrammaticalStructures()
Whether or not we have typed dependencies for this language.
|
java.lang.String |
treebankFileExtension()
Returns the extension of treebank files for this treebank.
|
TreeReaderFactory |
treeReaderFactory()
Returns a TreeReaderFactory suitable for general purpose use
with this language/treebank.
|
HeadFinder |
typedDependencyHeadFinder()
The HeadFinder to use when making typed dependencies.
|
basicCategory, categoryAndFunction, evalBIgnoredPunctuationTagAcceptFilter, evalBIgnoredPunctuationTagRejectFilter, evalBIgnoredPunctuationTags, getBasicCategoryFunction, getCategoryAndFunctionFunction, getGfCharacter, isLabelAnnotationIntroducingCharacter, isStartSymbol, morphFeatureSpec, punctuationTagAcceptFilter, punctuationTagRejectFilter, punctuationWordAcceptFilter, punctuationWordRejectFilter, sentenceFinalPunctuationTagAcceptFilter, setGenerateOriginalDependencies, setGfCharacter, startSymbol, startSymbolAcceptFilter, stripGF, treeTokenizerFactorypublic static final java.lang.String ENCODING
public void setTokenizerFactory(TokenizerFactory<? extends HasWord> tf)
public TokenizerFactory<? extends HasWord> getTokenizerFactory()
AbstractTreebankLanguagePackWhitespaceTokenizer.getTokenizerFactory in interface TreebankLanguagePackgetTokenizerFactory in class AbstractTreebankLanguagePackpublic java.lang.String getEncoding()
Charset class.getEncoding in interface TreebankLanguagePackgetEncoding in class AbstractTreebankLanguagePackpublic boolean isPunctuationTag(java.lang.String str)
isPunctuationTag in interface TreebankLanguagePackisPunctuationTag in class AbstractTreebankLanguagePackstr - The string to checkpublic boolean isPunctuationWord(java.lang.String str)
isPunctuationWord in interface TreebankLanguagePackisPunctuationWord in class AbstractTreebankLanguagePackstr - The string to checkpublic boolean isSentenceFinalPunctuationTag(java.lang.String str)
isSentenceFinalPunctuationTag in interface TreebankLanguagePackisSentenceFinalPunctuationTag in class AbstractTreebankLanguagePackstr - The string to checkpublic java.lang.String[] punctuationTags()
punctuationTags in interface TreebankLanguagePackpunctuationTags in class AbstractTreebankLanguagePackpublic java.lang.String[] punctuationWords()
punctuationWords in interface TreebankLanguagePackpunctuationWords in class AbstractTreebankLanguagePackpublic java.lang.String[] sentenceFinalPunctuationTags()
sentenceFinalPunctuationTags in interface TreebankLanguagePacksentenceFinalPunctuationTags in class AbstractTreebankLanguagePackpublic java.lang.String[] sentenceFinalPunctuationWords()
public boolean isEvalBIgnoredPunctuationTag(java.lang.String str)
isEvalBIgnoredPunctuationTag in interface TreebankLanguagePackisEvalBIgnoredPunctuationTag in class AbstractTreebankLanguagePackstr - The string to checkpublic char[] labelAnnotationIntroducingCharacters()
labelAnnotationIntroducingCharacters in interface TreebankLanguagePacklabelAnnotationIntroducingCharacters in class AbstractTreebankLanguagePackpublic java.lang.String[] startSymbols()
startSymbols in interface TreebankLanguagePackstartSymbols in class AbstractTreebankLanguagePackpublic static java.util.function.Predicate<java.lang.String> chineseCommaAcceptFilter()
public static java.util.function.Predicate<java.lang.String> chineseEndSentenceAcceptFilter()
public static java.util.function.Predicate<java.lang.String> chineseDouHaoAcceptFilter()
public static java.util.function.Predicate<java.lang.String> chineseQuoteMarkAcceptFilter()
public static java.util.function.Predicate<java.lang.String> chineseParenthesisAcceptFilter()
public static java.util.function.Predicate<java.lang.String> chineseColonAcceptFilter()
public static java.util.function.Predicate<java.lang.String> chineseDashAcceptFilter()
public static java.util.function.Predicate<java.lang.String> chineseOtherAcceptFilter()
public static java.util.function.Predicate<java.lang.String> chineseLeftParenthesisAcceptFilter()
public static java.util.function.Predicate<java.lang.String> chineseRightParenthesisAcceptFilter()
public static java.util.function.Predicate<java.lang.String> chineseLeftQuoteMarkAcceptFilter()
public static java.util.function.Predicate<java.lang.String> chineseRightQuoteMarkAcceptFilter()
public java.lang.String treebankFileExtension()
public GrammaticalStructureFactory grammaticalStructureFactory()
AbstractTreebankLanguagePackgrammaticalStructureFactory in interface TreebankLanguagePackgrammaticalStructureFactory in class AbstractTreebankLanguagePackpublic GrammaticalStructureFactory grammaticalStructureFactory(java.util.function.Predicate<java.lang.String> puncFilt)
AbstractTreebankLanguagePackgrammaticalStructureFactory in interface TreebankLanguagePackgrammaticalStructureFactory in class AbstractTreebankLanguagePackpuncFilt - A filter which should reject punctuation words (as Strings)public GrammaticalStructureFactory grammaticalStructureFactory(java.util.function.Predicate<java.lang.String> puncFilt, HeadFinder hf)
AbstractTreebankLanguagePackgrammaticalStructureFactory in interface TreebankLanguagePackgrammaticalStructureFactory in class AbstractTreebankLanguagePackpuncFilt - A filter which should reject punctuation words (as Strings)hf - A HeadFinder which finds heads for typed dependenciespublic boolean supportsGrammaticalStructures()
TreebankLanguagePacksupportsGrammaticalStructures in interface TreebankLanguagePacksupportsGrammaticalStructures in class AbstractTreebankLanguagePackpublic TreeReaderFactory treeReaderFactory()
AbstractTreebankLanguagePacktreeReaderFactory in interface TreebankLanguagePacktreeReaderFactory in class AbstractTreebankLanguagePackpublic HeadFinder headFinder()
public HeadFinder typedDependencyHeadFinder()
public boolean generateOriginalDependencies()
generateOriginalDependencies in interface TreebankLanguagePackgenerateOriginalDependencies in class AbstractTreebankLanguagePack