public class SegmentedDocument
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
private int |
currentSentenceIdx
Current sentence index
|
private IDocumentFactory |
documentFactory |
protected boolean |
isSegmented
Original text
|
private java.util.List<SentenceSegment> |
sentences
Sentences
|
protected java.lang.String |
text
Original text
|
protected java.net.URL |
url
Document URL
|
private java.util.List<WordSegment> |
words
Words
|
Modifier | Constructor and Description |
---|---|
protected |
SegmentedDocument(IDocumentFactory documentFactory,
java.net.URL url,
java.lang.String text)
Constructor
|
Modifier and Type | Method and Description |
---|---|
int |
getCurrentSentenceIdx() |
SentenceSegment |
getCurrentSentenceSegment() |
java.lang.String |
getPath()
Get document path
|
int |
getSentenceCount() |
java.util.List<SentenceSegment> |
getSentences() |
SentenceSegment |
getSentenceSegment(int sentenceIndex)
Get current sentence
|
java.lang.String |
getString(Segment segment)
Get segment string
|
java.lang.String |
getText() |
java.util.List<WordSegment> |
getWords() |
static Segment |
merge(java.util.List<Segment> segments)
Merge segments to segment
|
static Segment |
merge(Segment... segments)
Merge segments to segment
|
int |
nextSentence() |
int |
previousSentence() |
void |
segment()
Segment
|
static java.lang.String |
sentenceToString(java.util.List<Segment> sentence,
int lineLen)
Convert sentence to string
|
void |
setSentenceIdx(int sentenceIdx) |
java.util.List<Segment> |
split(Segment segment)
Split segment into word segments
|
java.util.List<Segment> |
split(Segment leftSegment,
Segment rightSegment)
Make intermediate segment list
|
java.lang.String |
toString() |
private final IDocumentFactory documentFactory
private java.util.List<WordSegment> words
private java.util.List<SentenceSegment> sentences
protected final java.net.URL url
protected java.lang.String text
protected boolean isSegmented
private int currentSentenceIdx
protected SegmentedDocument(IDocumentFactory documentFactory, java.net.URL url, java.lang.String text)
documentFactory
- document factoryurl
- document urltext
- document textpublic void segment()
public java.lang.String getText()
public int getSentenceCount()
public java.util.List<WordSegment> getWords()
public java.util.List<SentenceSegment> getSentences()
public java.util.List<Segment> split(Segment leftSegment, Segment rightSegment)
leftSegment
- start segmentrightSegment
- finish segmentpublic java.util.List<Segment> split(Segment segment)
segment
- segmentpublic static Segment merge(java.util.List<Segment> segments)
segments
- segmentspublic static Segment merge(Segment... segments)
segments
- segmentspublic java.lang.String getPath()
public java.lang.String getString(Segment segment)
segment
- segmentpublic static java.lang.String sentenceToString(java.util.List<Segment> sentence, int lineLen)
sentence
- sentencelineLen
- line length to wrap atpublic SentenceSegment getSentenceSegment(int sentenceIndex)
sentenceIndex
- sentence indexpublic int nextSentence()
public int previousSentence()
public void setSentenceIdx(int sentenceIdx)
public int getCurrentSentenceIdx()
public SentenceSegment getCurrentSentenceSegment()
public java.lang.String toString()
toString
in class java.lang.Object