public class HtmlCleaner extends Object
It represents public interface to the user. It's task is to call tokenizer with specified source HTML, traverse list of produced token list and create internal object model. It also offers a set of methods to write resulting XML to string, file or any output stream.
Typical usage is the following:
| Modifier and Type | Field and Description |
|---|---|
static String |
DEFAULT_CHARSET |
| Constructor and Description |
|---|
HtmlCleaner(File file)
Constructor - creates the instance for specified file and charset.
|
HtmlCleaner(File file,
ITagInfoProvider tagInfoProvider)
Constructor - creates the instance for specified file and charset.
|
HtmlCleaner(File file,
String charset)
Constructor - creates the instance for specified file.
|
HtmlCleaner(File file,
String charset,
ITagInfoProvider tagInfoProvider)
Constructor - creates the instance for specified file.
|
HtmlCleaner(InputStream in)
Constructor - creates the instance for the specified inpout stream
|
HtmlCleaner(InputStream in,
ITagInfoProvider tagInfoProvider)
Constructor - creates the instance for the specified inpout stream
|
HtmlCleaner(InputStream in,
String charset)
Constructor - creates the instance for the specified inpout stream and the
charset
|
HtmlCleaner(String htmlContent)
Constructor - creates the instance with specified html content as String.
|
HtmlCleaner(String htmlContent,
ITagInfoProvider tagInfoProvider)
Constructor - creates the instance with specified html content as String.
|
HtmlCleaner(URL url)
Constructor - creates the instance for specified URL and charset.
|
HtmlCleaner(URL url,
ITagInfoProvider tagInfoProvider)
Constructor - creates the instance for specified URL and charset.
|
HtmlCleaner(URL url,
String charset)
Constructor - creates the instance for specified URL and charset.
|
HtmlCleaner(URL url,
String charset,
ITagInfoProvider tagInfoProvider)
Constructor - creates the instance for specified URL and charset.
|
public static final String DEFAULT_CHARSET
public HtmlCleaner(String htmlContent, ITagInfoProvider tagInfoProvider)
htmlContent - public HtmlCleaner(String htmlContent)
htmlContent - public HtmlCleaner(File file, String charset, ITagInfoProvider tagInfoProvider) throws IOException
file - charset - IOExceptionpublic HtmlCleaner(File file, String charset) throws IOException
file - charset - IOExceptionpublic HtmlCleaner(File file, ITagInfoProvider tagInfoProvider) throws IOException
file - IOExceptionpublic HtmlCleaner(File file) throws IOException
file - IOExceptionpublic HtmlCleaner(URL url, String charset, ITagInfoProvider tagInfoProvider) throws IOException
url - charset - IOExceptionpublic HtmlCleaner(URL url, ITagInfoProvider tagInfoProvider) throws IOException
url - tagInfoProvider - IOExceptionpublic HtmlCleaner(URL url, String charset) throws IOException
url - charset - IOExceptionpublic HtmlCleaner(URL url) throws IOException
url - IOExceptionpublic HtmlCleaner(InputStream in, ITagInfoProvider tagInfoProvider)
in - tagInfoProvider - public HtmlCleaner(InputStream in)
in - public HtmlCleaner(InputStream in, String charset) throws IOException
in - charset - IOExceptionpublic void clean()
throws IOException
IOExceptionpublic List<BaseToken> getNodeList() throws IOException
IOExceptionpublic boolean isOmitUnknownTags()
public void setOmitUnknownTags(boolean omitUnknownTags)
public boolean isOmitDeprecatedTags()
public void setOmitDeprecatedTags(boolean omitDeprecatedTags)
public boolean isAdvancedXmlEscape()
public void setAdvancedXmlEscape(boolean advancedXmlEscape)
public boolean isUseCdataForScriptAndStyle()
public void setUseCdataForScriptAndStyle(boolean useCdataForScriptAndStyle)
public boolean isTranslateSpecialEntities()
public void setTranslateSpecialEntities(boolean translateSpecialEntities)
public boolean isRecognizeUnicodeChars()
public void setRecognizeUnicodeChars(boolean recognizeUnicodeChars)
public boolean isOmitComments()
public void setOmitComments(boolean omitComments)
public boolean isOmitXmlDeclaration()
public void setOmitXmlDeclaration(boolean omitXmlDeclaration)
public boolean isOmitDoctypeDeclaration()
public void setOmitDoctypeDeclaration(boolean omitDoctypeDeclaration)
public boolean isOmitXmlnsAttributes()
public void setOmitXmlnsAttributes(boolean omitXmlnsAttributes)
public String getHyphenReplacementInComment()
public void setHyphenReplacementInComment(String hyphenReplacementInComment)
public void writeXml(XmlSerializer xmlSerializer) throws IOException
xmlSerializer - IOExceptionpublic void writeXmlToStream(OutputStream out) throws IOException
IOExceptionpublic void writeXmlToStream(OutputStream out, String charset) throws IOException
IOExceptionpublic void writeCompactXmlToStream(OutputStream out) throws IOException
IOExceptionpublic void writeCompactXmlToStream(OutputStream out, String charset) throws IOException
IOExceptionpublic void writePrettyXmlToStream(OutputStream out) throws IOException
IOExceptionpublic void writePrettyXmlToStream(OutputStream out, String charset) throws IOException
IOExceptionpublic void writeXmlToFile(String fileName) throws IOException
IOExceptionpublic void writeXmlToFile(String fileName, String charset) throws IOException
IOExceptionpublic void writeCompactXmlToFile(String fileName) throws IOException
IOExceptionpublic void writeCompactXmlToFile(String fileName, String charset) throws IOException
IOExceptionpublic void writePrettyXmlToFile(String fileName) throws IOException
IOExceptionpublic void writePrettyXmlToFile(String fileName, String charset) throws IOException
IOExceptionpublic String getXmlAsString() throws IOException
IOExceptionpublic String getCompactXmlAsString() throws IOException
IOExceptionpublic String getPrettyXmlAsString() throws IOException
IOExceptionpublic TagNode getBodyNode()
public void setBodyNode(TagNode bodyNode)
Copyright © 2017 Java Wikipedia API (Bliki engine). All rights reserved.