/*
 * Decompiled with CFR 0.152.
 */
package org.semanticdesktop.aperture.extractor.util;

import java.io.BufferedInputStream;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.HashSet;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.htmlparser.Parser;
import org.htmlparser.Tag;
import org.htmlparser.Text;
import org.htmlparser.lexer.InputStreamSource;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.lexer.Page;
import org.htmlparser.lexer.Source;
import org.htmlparser.tags.MetaTag;
import org.htmlparser.util.EncodingChangeException;
import org.htmlparser.util.ParserException;
import org.htmlparser.util.ParserFeedback;
import org.htmlparser.util.Translate;
import org.htmlparser.visitors.NodeVisitor;
import org.semanticdesktop.aperture.extractor.ExtractorException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class HtmlParserUtil {
    private static final int BUFFER_SIZE = InputStreamSource.BUFFER_SIZE;
    private static final ParserFeedback FEEDBACK_LOGGER = new ParserFeedback(){
        private final Logger logger = LoggerFactory.getLogger(this.getClass());

        public void info(String string) {
            this.logger.info(string);
        }

        public void warning(String string) {
            this.logger.warn(string);
        }

        public void error(String string, ParserException parserException) {
            this.logger.error(string, (Throwable)parserException);
        }
    };

    public static void parse(InputStream inputStream, Charset charset, ContentExtractor contentExtractor) throws ExtractorException {
        String string;
        String string2 = string = charset == null ? "ISO-8859-1" : charset.displayName();
        if (!inputStream.markSupported()) {
            inputStream = new BufferedInputStream(inputStream, BUFFER_SIZE);
        }
        inputStream.mark(BUFFER_SIZE);
        try {
            InputStreamSource inputStreamSource = new InputStreamSource(inputStream, string, BUFFER_SIZE);
            Page page = new Page((Source)inputStreamSource);
            Lexer lexer = new Lexer(page);
            Parser parser = new Parser(lexer, FEEDBACK_LOGGER);
            try {
                try {
                    parser.visitAllNodesWith((NodeVisitor)contentExtractor);
                }
                catch (EncodingChangeException encodingChangeException) {
                    parser.reset();
                    contentExtractor.reset();
                    parser.visitAllNodesWith((NodeVisitor)contentExtractor);
                }
            }
            catch (ParserException parserException) {
                throw new ExtractorException(parserException);
            }
        }
        catch (UnsupportedEncodingException unsupportedEncodingException) {
            throw new ExtractorException(unsupportedEncodingException);
        }
    }

    public static class ContentExtractor
    extends NodeVisitor {
        private static final String XMP = "XMP";
        private static final String PLAINTEXT = "PLAINTEXT";
        private static final String STYLE = "STYLE";
        private static final String SCRIPT = "SCRIPT";
        private static final String TITLE = "TITLE";
        private boolean inTextContext;
        private boolean inTitleContext;
        private boolean decodeText;
        private StringBuilder textBuffer = new StringBuilder(32768);
        private HashSet keywordBuffer = new HashSet();
        private String title;
        private String author;
        private String description;

        public ContentExtractor() {
            this.initFlags();
        }

        private void initFlags() {
            this.inTextContext = true;
            this.inTitleContext = false;
            this.decodeText = true;
        }

        public void reset() {
            this.initFlags();
            this.textBuffer.setLength(0);
            this.keywordBuffer.clear();
            this.title = null;
            this.author = null;
            this.description = null;
        }

        public String getText() {
            return this.textBuffer.toString();
        }

        public Iterator getKeywords() {
            return this.keywordBuffer.iterator();
        }

        public String getTitle() {
            return this.title;
        }

        public String getAuthor() {
            return this.author;
        }

        public String getDescription() {
            return this.description;
        }

        public void visitStringNode(Text text) {
            if (this.inTitleContext) {
                this.title = this.resolveText(text.getText());
                if (this.title != null) {
                    this.title = this.title.trim();
                }
            }
            if (this.inTextContext) {
                String string = text.getText();
                if (this.decodeText) {
                    string = this.resolveText(string);
                }
                this.textBuffer.append(string);
                this.textBuffer.append(' ');
            }
        }

        private String resolveText(String string) {
            string = Translate.decode((String)string);
            string = string.replace('\u00a0', ' ');
            return string;
        }

        public void visitTag(Tag tag) {
            String string = tag.getTagName();
            if (STYLE.equals(string) || SCRIPT.equals(string)) {
                this.inTextContext = false;
            } else {
                this.inTextContext = true;
                this.inTitleContext = TITLE.equals(string);
                if (tag instanceof MetaTag) {
                    MetaTag metaTag = (MetaTag)tag;
                    String string2 = metaTag.getMetaTagName();
                    String string3 = metaTag.getMetaContent();
                    if (string2 != null && string3 != null) {
                        if ((string2 = string2.toLowerCase()).equals("author")) {
                            this.author = string3;
                        } else if (string2.equals("description")) {
                            this.description = string3;
                        } else if (string2.equals("keywords")) {
                            StringTokenizer stringTokenizer = new StringTokenizer(string3, " ,\t", false);
                            while (stringTokenizer.hasMoreTokens()) {
                                String string4 = stringTokenizer.nextToken();
                                if (string4 == null) continue;
                                this.keywordBuffer.add(string4);
                            }
                        }
                    }
                } else if (XMP.equals(string) || PLAINTEXT.equals(string)) {
                    this.decodeText = false;
                }
            }
        }

        public void visitEndTag(Tag tag) {
            this.inTitleContext = false;
            String string = tag.getTagName();
            if (XMP.equals(string) || PLAINTEXT.equals(string)) {
                this.decodeText = true;
            }
        }
    }
}

