public class Segmenter extends java.lang.Object implements ISegmenter
Modifier and Type | Field and Description |
---|---|
private edu.stanford.nlp.process.TokenizerFactory<edu.stanford.nlp.ling.CoreLabel> |
tokenizerFactory |
Constructor and Description |
---|
Segmenter(Language language)
Constructor
|
Modifier and Type | Method and Description |
---|---|
private static java.util.List<java.util.List<edu.stanford.nlp.ling.CoreLabel>> |
getSentences(java.util.List<edu.stanford.nlp.ling.CoreLabel> words)
Segment words into sentences
|
java.util.List<java.util.List<edu.stanford.nlp.ling.CoreLabel>> |
getSentenceWords(java.lang.String text)
Get sentences from text
|
java.util.List<java.util.List<edu.stanford.nlp.ling.CoreLabel>> |
getSentenceWords(java.net.URL url)
Get document from URL
|
java.lang.String |
getTokenizeModel()
Get tokenize model
|
private java.util.List<edu.stanford.nlp.ling.CoreLabel> |
getTokens(java.lang.String text)
Get list of tokens from text
|
private java.util.List<edu.stanford.nlp.ling.CoreLabel> |
getTokens(java.net.URL url)
Get list of tokens from URL
|
private final edu.stanford.nlp.process.TokenizerFactory<edu.stanford.nlp.ling.CoreLabel> tokenizerFactory
public Segmenter(Language language)
language
- languageprivate java.util.List<edu.stanford.nlp.ling.CoreLabel> getTokens(java.lang.String text)
text
- textprivate java.util.List<edu.stanford.nlp.ling.CoreLabel> getTokens(java.net.URL url) throws java.io.IOException
url
- document URLjava.io.IOException
- exceptionprivate static java.util.List<java.util.List<edu.stanford.nlp.ling.CoreLabel>> getSentences(java.util.List<edu.stanford.nlp.ling.CoreLabel> words)
words
- wordspublic java.util.List<java.util.List<edu.stanford.nlp.ling.CoreLabel>> getSentenceWords(java.lang.String text)
ISegmenter
getSentenceWords
in interface ISegmenter
text
- text to segmentpublic java.util.List<java.util.List<edu.stanford.nlp.ling.CoreLabel>> getSentenceWords(java.net.URL url) throws java.io.IOException
ISegmenter
getSentenceWords
in interface ISegmenter
url
- URLjava.io.IOException
- exceptionpublic java.lang.String getTokenizeModel()
ISegmenter
getTokenizeModel
in interface ISegmenter