public class TrainOptions
extends java.lang.Object
implements java.io.Serializable
| Modifier and Type | Class and Description | 
|---|---|
| static class  | TrainOptions.TransformMatrixType | 
| Modifier and Type | Field and Description | 
|---|---|
| boolean | basicCategoryTagsInDependencyGrammarWhere to use the basic or split tags in the dependency grammar | 
| int | batchSize | 
| boolean | cheatPCFGAdd all test set trees to training data for PCFG. | 
| boolean | collinsPuncPromote/delete punctuation like Collins. | 
| int | compactGrammarHow to compact grammars as FSMs. | 
| int | debugOutputFrequencyIf larger than 0, the parser may choose to output debug information
 every X seconds, X iterations, or some other similar metric | 
| static int | DEFAULT_BATCH_SIZEWhen training using batches of trees, such as in the DVParser,
 how many trees to use in one batch | 
| static double | DEFAULT_DELTA_MARGIN | 
| static int | DEFAULT_K_BESTWhen training the DV parsing method, how many of the top K trees
 to analyze from the underlying parser | 
| static double | DEFAULT_LEARNING_RATE | 
| static int | DEFAULT_QN_ITERATIONS_PER_BATCHWhen training the DV parsing method, how many iterations to loop
 for one batch of trees | 
| static double | DEFAULT_REGCOSTregularization constant | 
| static double | DEFAULT_SCALING_FOR_INIT | 
| static int | DEFAULT_STALLED_ITERATION_LIMIT | 
| static int | DEFAULT_TRAINING_ITERATIONSWhen training a parsing method where the training has a (max)
 number of iterations, how many iterations to loop | 
| static java.lang.String | DEFAULT_UNK_WORD | 
| java.util.Set<java.lang.String> | deleteSplitters | 
| double | deltaMarginHow much to penalize the wrong trees for how different they are
 from the gold tree when training | 
| int | dvKBest | 
| boolean | dvSimplifiedModelMake the dv model as simple as possible | 
| double | fractionBeforeUnseenCountingStart to aggregate signature-tag pairs only for words unseen in the first
 this fraction of the data. | 
| boolean | gPAThis variable controls doing 2 levels of parent annotation. | 
| int | HSEL_CUT | 
| boolean | hSelSplit | 
| double | learningRateHow fast to learn (can mean different things for different algorithms) | 
| boolean | leftRecLeft edge is right-recursive (X << X)  Bad. | 
| boolean | leftToRight | 
| boolean | lowercaseWordVectorsWhether or not to lowercase word vectors | 
| boolean | markFinalStatesWhether or not to mark final states in binarized grammar. | 
| boolean | markovFactorWhether to do "horizontal Markovization" (as in ACL 2003 paper). | 
| int | markovOrder | 
| boolean | markStrahlerHorton-Strahler number/dimension (Maximilian Schlund) | 
| int | markUnaryMark all unary nodes specially. | 
| boolean | markUnaryTagsMark POS tags which are the sole member of their phrasal constituent. | 
| int | maxTrainTimeSeconds | 
| boolean | noRebinarizationWhen binarizing trees, don't binarize trees with two children. | 
| boolean | noTagSplit | 
| int | openClassTypesThresholdA POS tag has to have been attributed to more than this number of word
 types before it is regarded as an open-class tag. | 
| boolean | PAThis variable controls doing parent annotation of phrasal nodes. | 
| boolean | postGPA | 
| boolean | postPA | 
| java.util.Set | postSplitters | 
| boolean | postSplitWithBaseCategoryWhether, in post-splitting of categories, nodes are annotated with the
  (grand)parent's base category or with its complete subcategorized
  category. | 
| boolean | predictSplitsUse the method reported by Berkeley for splitting and recombining
 states. | 
| TreeTransformer | preTransformerA transformer to use on the training data before any other
 processing step. | 
| java.io.PrintWriter | printAnnotatedPW | 
| boolean | printAnnotatedRuleCounts | 
| boolean | printAnnotatedStateCounts | 
| java.io.PrintWriter | printBinarizedPW | 
| boolean | printStates | 
| int | printTreeTransformationsJust for debugging: check that your tree transforms work correctly. | 
| int | qnEstimatesWhen training the DV parsing method, how many estimates to keep
 for the qn approximation. | 
| int | qnIterationsPerBatch | 
| double | qnToleranceWhen training the DV parsing method, the tolerance to use if we
 want to stop qn early | 
| long | randomSeed | 
| double | regCost | 
| boolean | rightRecRight edge is right-recursive (X << X) Bad. | 
| double | ruleDiscountDiscounts the count of BinaryRule's (only, apparently) in training data. | 
| boolean | ruleSmoothingEnables linear rule smoothing during grammar extraction
 but before grammar compaction. | 
| double | ruleSmoothingAlpha | 
| double | scalingForInitHow much to scale certain parameters when initializing models. | 
| boolean | selectivePostSplit | 
| double | selectivePostSplitCutOff | 
| boolean | selectiveSplitOnly split the "common high KL divergence" parent categories.... | 
| double | selectiveSplitCutOff | 
| boolean | simpleBinarizedLabelsWhen binarizing trees, don't annotate the labels with anything | 
| boolean | sisterAnnotateSelective Sister annotation. | 
| java.util.Set<java.lang.String> | sisterSplitters | 
| boolean | smoothingTODO wsg2011: This is the old grammar smoothing parameter that no
 longer does anything in the parser. | 
| int | splitCountIf we are predicting splits, we loop this many times | 
| boolean | splitPrePreTMark all pre-preterminals (also does splitBaseNP: don't need both) | 
| double | splitRecombineRateIf we are predicting splits, we recombine states at this rate every loop | 
| java.util.Set<java.lang.String> | splittersSet the splitter strings. | 
| int | stalledIterationLimitHow many iterations to allow training to stall before taking the
 best model, if training in an iterative manner | 
| java.lang.String | taggedFilesA set of files to use as extra information in the lexicon. | 
| boolean | tagPAParent annotation on tags. | 
| boolean | tagSelectivePostSplit | 
| double | tagSelectivePostSplitCutOff | 
| boolean | tagSelectiveSplitDo parent annotation on tags selectively. | 
| double | tagSelectiveSplitCutOff | 
| int | trainingIterations | 
| int | trainingThreadsIf the training algorithm allows for parallelization, how many
 threads to use | 
| int | trainLengthLimit | 
| java.lang.String | trainTreeFile | 
| boolean | trainWordVectorsDo we want a model that uses word vectors (such as the DVParser)
 to train those word vectors when training the model?
  Note: models prior to 2014-02-13 may have incorrect values in this field, as it was originally a compile time constant | 
| TrainOptions.TransformMatrixType | transformMatrixType | 
| boolean | unknownCapsVectorWhether or not to build an unknown word vector for words with caps in them | 
| boolean | unknownChineseNumberVectorWhether or not to build an unknown word vector to match Chinese numbers | 
| boolean | unknownChinesePercentVectorWhether or not to build an unknown word vector to match Chinese percentages | 
| boolean | unknownChineseYearVectorWhether or not to build an unknown word vector to match Chinese years | 
| boolean | unknownDashedWordVectorsWhether or not to handle unknown dashed words by taking the last part | 
| boolean | unknownNumberVectorWhether or not to build an unknown word vector specifically for numbers | 
| java.lang.String | unkWordSome models will use external data sources which contain
 information about unknown words. | 
| boolean | useContextWordsSpecifically for the DVModel, uses words on either side of a
 context when combining constituents. | 
| Constructor and Description | 
|---|
| TrainOptions() | 
| Modifier and Type | Method and Description | 
|---|---|
| int | compactGrammar() | 
| void | display() | 
| boolean | outsideFactor()If true, declare early -- leave this on except maybe with markov on. | 
| static void | printTrainTree(java.io.PrintWriter pw,
              java.lang.String message,
              Tree t) | 
| java.lang.String | toString() | 
public java.lang.String trainTreeFile
public int trainLengthLimit
public boolean cheatPCFG
public boolean markovFactor
public int markovOrder
public boolean hSelSplit
public int HSEL_CUT
public boolean markFinalStates
public int openClassTypesThreshold
public double fractionBeforeUnseenCounting
public boolean PA
public boolean gPA
public boolean postPA
public boolean postGPA
public boolean selectiveSplit
public double selectiveSplitCutOff
public boolean selectivePostSplit
public double selectivePostSplitCutOff
public boolean postSplitWithBaseCategory
public boolean sisterAnnotate
public java.util.Set<java.lang.String> sisterSplitters
public int markUnary
public boolean markUnaryTags
public boolean splitPrePreT
public boolean tagPA
public boolean tagSelectiveSplit
public double tagSelectiveSplitCutOff
public boolean tagSelectivePostSplit
public double tagSelectivePostSplitCutOff
public boolean rightRec
public boolean leftRec
public boolean collinsPunc
public java.util.Set<java.lang.String> splitters
public java.util.Set postSplitters
public java.util.Set<java.lang.String> deleteSplitters
public int printTreeTransformations
public java.io.PrintWriter printAnnotatedPW
public java.io.PrintWriter printBinarizedPW
public boolean printStates
public int compactGrammar
public boolean leftToRight
public boolean noTagSplit
public boolean ruleSmoothing
public double ruleSmoothingAlpha
public boolean smoothing
public double ruleDiscount
public boolean printAnnotatedRuleCounts
public boolean printAnnotatedStateCounts
public boolean basicCategoryTagsInDependencyGrammar
public TreeTransformer preTransformer
public java.lang.String taggedFiles
public boolean predictSplits
public int splitCount
public double splitRecombineRate
public boolean simpleBinarizedLabels
public boolean noRebinarization
public int trainingThreads
public static final int DEFAULT_K_BEST
public int dvKBest
public static final int DEFAULT_TRAINING_ITERATIONS
public int trainingIterations
public static final int DEFAULT_BATCH_SIZE
public int batchSize
public static final double DEFAULT_REGCOST
public double regCost
public static final int DEFAULT_QN_ITERATIONS_PER_BATCH
public int qnIterationsPerBatch
public int qnEstimates
public double qnTolerance
public int debugOutputFrequency
public long randomSeed
public static final double DEFAULT_LEARNING_RATE
public double learningRate
public static final double DEFAULT_DELTA_MARGIN
public double deltaMargin
public boolean unknownNumberVector
public boolean unknownDashedWordVectors
public boolean unknownCapsVector
public boolean dvSimplifiedModel
public boolean unknownChineseYearVector
public boolean unknownChineseNumberVector
public boolean unknownChinesePercentVector
public static final double DEFAULT_SCALING_FOR_INIT
public double scalingForInit
public int maxTrainTimeSeconds
public static final java.lang.String DEFAULT_UNK_WORD
public java.lang.String unkWord
public boolean lowercaseWordVectors
public TrainOptions.TransformMatrixType transformMatrixType
public boolean useContextWords
public boolean trainWordVectors
public static final int DEFAULT_STALLED_ITERATION_LIMIT
public int stalledIterationLimit
public boolean markStrahler
public boolean outsideFactor()
public int compactGrammar()
public void display()
public java.lang.String toString()
toString in class java.lang.Objectpublic static void printTrainTree(java.io.PrintWriter pw,
                                  java.lang.String message,
                                  Tree t)