public class HTMLParser
extends javax.swing.text.html.HTMLEditorKit.ParserCallback
parse(URL url)
method, which
will return a sentence with the contents of an HTML page, without the tags. After calling parse, you can get the HTML title (contents of the TITLE tag) by
calling title(). Subclasses may override the handleText(), handleComment(), handleStartTag(), etc. methods so that parse(URL url)
returns
something other than the text of the web page. (For example, one may be interested in returning only part of the text, or only the links.)Modifier and Type | Field and Description |
---|---|
protected boolean |
isBody |
protected boolean |
isScript |
protected boolean |
isTitle |
private static int |
SLURP_BUFFER_SIZE |
protected java.lang.StringBuffer |
textBuffer |
protected java.lang.String |
title |
Constructor and Description |
---|
HTMLParser() |
Modifier and Type | Method and Description |
---|---|
static java.lang.String |
escapeString(java.lang.String s,
char[] charsToEscape,
char escapeChar) |
void |
handleEndTag(javax.swing.text.html.HTML.Tag tag,
int pos)
Sets a flag if the end tag is the "TITLE" element end tag
|
void |
handleStartTag(javax.swing.text.html.HTML.Tag tag,
javax.swing.text.MutableAttributeSet attrSet,
int pos)
Sets a flag if the start tag is the "TITLE" element start tag.
|
void |
handleText(char[] data,
int pos) |
static void |
main(java.lang.String[] args) |
java.lang.String |
parse(java.io.Reader r) |
java.lang.String |
parse(java.lang.String text0)
The parse method that actually does the work.
|
java.lang.String |
parse(java.net.URL url) |
static java.lang.String |
searchAndReplace(java.lang.String text,
java.lang.String from0,
java.lang.String to) |
static java.lang.String |
slurpReader(java.io.Reader reader)
Returns all the text from the given Reader.
|
static java.lang.String |
slurpURL(java.net.URL u)
Returns all the text at the given URL.
|
java.lang.String |
title() |
protected java.lang.StringBuffer textBuffer
protected java.lang.String title
protected boolean isTitle
protected boolean isBody
protected boolean isScript
private static final int SLURP_BUFFER_SIZE
public void handleText(char[] data, int pos)
handleText
in class javax.swing.text.html.HTMLEditorKit.ParserCallback
public void handleStartTag(javax.swing.text.html.HTML.Tag tag, javax.swing.text.MutableAttributeSet attrSet, int pos)
handleStartTag
in class javax.swing.text.html.HTMLEditorKit.ParserCallback
public void handleEndTag(javax.swing.text.html.HTML.Tag tag, int pos)
handleEndTag
in class javax.swing.text.html.HTMLEditorKit.ParserCallback
public java.lang.String parse(java.net.URL url) throws java.io.IOException
java.io.IOException
public java.lang.String parse(java.io.Reader r) throws java.io.IOException
java.io.IOException
public java.lang.String parse(java.lang.String text0) throws java.io.IOException
text0
- input textjava.io.IOException
- exceptionpublic java.lang.String title()
public static java.lang.String searchAndReplace(java.lang.String text, java.lang.String from0, java.lang.String to)
public static java.lang.String escapeString(java.lang.String s, char[] charsToEscape, char escapeChar)
public static java.lang.String slurpReader(java.io.Reader reader) throws java.io.IOException
reader
- readerjava.io.IOException
- exceptionpublic static java.lang.String slurpURL(java.net.URL u) throws java.io.IOException
u
- urljava.io.IOException
- exceptionpublic static void main(java.lang.String[] args) throws java.io.IOException
java.io.IOException