/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.parser.mbox;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.Collections;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class MboxParser
implements Parser {
    private static final Logger LOGGER = Logger.getLogger(MboxParser.class);
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.application((String)"mbox"));
    public static final String MBOX_MIME_TYPE = "application/mbox";
    public static final String MBOX_RECORD_DIVIDER = "From ";
    private static final Pattern EMAIL_HEADER_PATTERN = Pattern.compile("([^ ]+):[ \t]*(.*)");
    private static final String EMAIL_HEADER_METADATA_PREFIX = MboxParser.class.getSimpleName() + "-";
    private static final String EMAIL_FROMLINE_METADATA = EMAIL_HEADER_METADATA_PREFIX + "from";

    public Set<MediaType> getSupportedTypes(ParseContext context) {
        return SUPPORTED_TYPES;
    }

    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, TikaException, SAXException {
        InputStreamReader isr;
        try {
            isr = new InputStreamReader(stream, "us-ascii");
        }
        catch (UnsupportedEncodingException e) {
            LOGGER.error((Object)"Unexpected exception setting up MboxParser", (Throwable)e);
            isr = new InputStreamReader(stream);
        }
        BufferedReader reader = new BufferedReader(isr);
        metadata.set("Content-Type", MBOX_MIME_TYPE);
        metadata.set("Content-Encoding", "us-ascii");
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        ParseStates parseState = ParseStates.START;
        String multiLine = null;
        boolean inQuote = false;
        int numEmails = 0;
        String curLine = reader.readLine();
        while (curLine != null) {
            boolean newMessage = curLine.startsWith(MBOX_RECORD_DIVIDER);
            if (newMessage) {
                ++numEmails;
            }
            switch (parseState) {
                case START: {
                    if (!newMessage) break;
                    parseState = ParseStates.IN_HEADER;
                    newMessage = false;
                }
                case IN_HEADER: {
                    if (newMessage) {
                        this.saveHeaderInMetadata(numEmails, metadata, multiLine);
                        multiLine = curLine;
                        break;
                    }
                    if (curLine.length() == 0) {
                        this.saveHeaderInMetadata(numEmails, metadata, multiLine);
                        parseState = ParseStates.IN_CONTENT;
                        xhtml.startElement("div", "class", "email-entry");
                        xhtml.startElement("p");
                        inQuote = false;
                        break;
                    }
                    if (curLine.startsWith(" ") || curLine.startsWith("\t")) {
                        multiLine = multiLine + " " + curLine.trim();
                        break;
                    }
                    this.saveHeaderInMetadata(numEmails, metadata, multiLine);
                    multiLine = curLine;
                    break;
                }
                case IN_CONTENT: {
                    if (newMessage) {
                        this.endMessage(xhtml, inQuote);
                        parseState = ParseStates.IN_HEADER;
                        multiLine = curLine;
                        break;
                    }
                    boolean quoted = curLine.startsWith(">");
                    if (inQuote) {
                        if (!quoted) {
                            xhtml.endElement("q");
                            inQuote = false;
                        }
                    } else if (quoted) {
                        xhtml.startElement("q");
                        inQuote = true;
                    }
                    xhtml.characters(curLine);
                    xhtml.element("br", "");
                }
            }
            curLine = reader.readLine();
        }
        if (parseState == ParseStates.IN_HEADER) {
            this.saveHeaderInMetadata(numEmails, metadata, multiLine);
        } else if (parseState == ParseStates.IN_CONTENT) {
            this.endMessage(xhtml, inQuote);
        }
        xhtml.endDocument();
    }

    private void endMessage(XHTMLContentHandler xhtml, boolean inQuote) throws SAXException {
        if (inQuote) {
            xhtml.endElement("q");
        }
        xhtml.endElement("p");
        xhtml.endElement("div");
    }

    private void saveHeaderInMetadata(int numEmails, Metadata metadata, String curLine) {
        if (curLine == null || numEmails > 1) {
            return;
        }
        if (curLine.startsWith(MBOX_RECORD_DIVIDER)) {
            metadata.add(EMAIL_FROMLINE_METADATA, curLine.substring(MBOX_RECORD_DIVIDER.length()));
            return;
        }
        Matcher headerMatcher = EMAIL_HEADER_PATTERN.matcher(curLine);
        if (!headerMatcher.matches()) {
            LOGGER.warn((Object)("Malformed email header in mbox file: " + curLine));
            return;
        }
        String headerTag = headerMatcher.group(1).toLowerCase();
        String headerContent = headerMatcher.group(2);
        if (headerTag.equalsIgnoreCase("From")) {
            metadata.add("Author", headerContent);
            metadata.add("creator", headerContent);
        } else if (headerTag.equalsIgnoreCase("Subject")) {
            metadata.add("subject", headerContent);
            metadata.add("title", headerContent);
        } else if (headerTag.equalsIgnoreCase("Date")) {
            metadata.add("date", headerContent);
        } else if (headerTag.equalsIgnoreCase("Message-Id")) {
            metadata.add("identifier", headerContent);
        } else if (headerTag.equalsIgnoreCase("In-Reply-To")) {
            metadata.add("relation", headerContent);
        } else if (headerTag.equalsIgnoreCase("Content-Type")) {
            metadata.add("Content-Type", headerContent);
            metadata.add("format", headerContent);
        } else {
            metadata.add(EMAIL_HEADER_METADATA_PREFIX + headerTag, headerContent);
        }
    }

    public void parse(InputStream stream, ContentHandler handler, Metadata metadata) throws IOException, SAXException, TikaException {
        this.parse(stream, handler, metadata, new ParseContext());
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static enum ParseStates {
        START,
        IN_HEADER,
        IN_CONTENT;

    }
}

