/*
 * Decompiled with CFR 0.152.
 */
package org.exoplatform.services.document.impl;

import com.lowagie.text.pdf.PdfDate;
import com.lowagie.text.pdf.PdfReader;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.StringWriter;
import java.io.Writer;
import java.util.Calendar;
import java.util.HashMap;
import java.util.Properties;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.commons.logging.Log;
import org.exoplatform.commons.utils.ISO8601;
import org.exoplatform.services.document.DCMetaData;
import org.exoplatform.services.document.impl.BaseDocumentReader;
import org.exoplatform.services.log.ExoLogger;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class PDFDocumentReader
extends BaseDocumentReader {
    protected static Log log = ExoLogger.getLogger("platform.PDFDocumentReader");

    public String[] getMimeTypes() {
        return new String[]{"application/pdf"};
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public String getContentAsText(InputStream is) throws Exception {
        PDDocument pdDocument = PDDocument.load((InputStream)is);
        StringWriter sw = new StringWriter();
        try {
            PDFTextStripper stripper = new PDFTextStripper();
            stripper.setStartPage(1);
            stripper.setEndPage(Integer.MAX_VALUE);
            stripper.writeText(pdDocument, (Writer)sw);
        }
        finally {
            if (pdDocument != null) {
                pdDocument.close();
            }
        }
        return sw.toString();
    }

    public String getContentAsText(InputStream is, String encoding) throws Exception {
        return this.getContentAsText(is);
    }

    public Properties getProperties(InputStream is) throws Exception {
        Properties props = null;
        PdfReader reader = new PdfReader(is, "".getBytes());
        byte[] metadata = reader.getMetadata();
        if (metadata != null) {
            props = this.getPropertiesFromMetadata(metadata);
        }
        if (props == null) {
            props = this.getPropertiesFromInfo(reader.getInfo());
        }
        reader.close();
        return props;
    }

    protected Properties getPropertiesFromMetadata(byte[] metadata) throws Exception {
        Properties props = null;
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        DocumentBuilder docBuilder = dbf.newDocumentBuilder();
        Document doc = docBuilder.parse(new ByteArrayInputStream(metadata));
        String version = "";
        NodeList list = doc.getElementsByTagName("pdfaid:conformance");
        if (list != null && list.item(0) != null) {
            version = version + list.item(0).getTextContent() + "-";
        }
        if ((list = doc.getElementsByTagName("pdfaid:part")) != null && list.item(0) != null) {
            version = version + list.item(0).getTextContent();
        }
        if (version.equalsIgnoreCase("A-1")) {
            props = this.getPropsFromPDFAMetadata(doc);
        }
        return props;
    }

    protected Properties getPropertiesFromInfo(HashMap info) throws Exception {
        String modDate;
        String creationDate;
        String subject;
        String author;
        Properties props = new Properties();
        String title = (String)info.get("Title");
        if (title != null) {
            props.put(DCMetaData.TITLE, title);
        }
        if ((author = (String)info.get("Author")) != null) {
            props.put(DCMetaData.CREATOR, author);
        }
        if ((subject = (String)info.get("Subject")) != null) {
            props.put(DCMetaData.SUBJECT, subject);
        }
        if ((creationDate = (String)info.get("CreationDate")) != null) {
            props.put(DCMetaData.DATE, PdfDate.decode(creationDate));
        }
        if ((modDate = (String)info.get("ModDate")) != null) {
            props.put(DCMetaData.DATE, PdfDate.decode(modDate));
        }
        return props;
    }

    private Properties getPropsFromPDFAMetadata(Document doc) throws Exception {
        Node modifyDateNode;
        Calendar c;
        Node creationDateNode;
        Properties props = new Properties();
        NodeList list = doc.getElementsByTagName("rdf:li");
        if (list != null && list.getLength() > 0) {
            for (int i = 0; i < list.getLength(); ++i) {
                Node n = list.item(i);
                if (n.getParentNode().getParentNode().getNodeName().equals("dc:title")) {
                    String title = n.getLastChild().getTextContent();
                    props.put(DCMetaData.TITLE, title);
                }
                if (n.getParentNode().getParentNode().getNodeName().equals("dc:creator")) {
                    String author = n.getLastChild().getTextContent();
                    props.put(DCMetaData.CREATOR, author);
                }
                if (!n.getParentNode().getParentNode().getNodeName().equals("dc:description")) continue;
                String description = n.getLastChild().getTextContent();
                props.put(DCMetaData.SUBJECT, description);
            }
        }
        if ((list = doc.getElementsByTagName("xmp:CreateDate")) != null && list.item(0) != null && (creationDateNode = list.item(0).getLastChild()) != null) {
            String creationDate = creationDateNode.getTextContent();
            c = ISO8601.parseEx(creationDate);
            props.put(DCMetaData.DATE, c);
        }
        if ((list = doc.getElementsByTagName("xmp:ModifyDate")) != null && list.item(0) != null && (modifyDateNode = list.item(0).getLastChild()) != null) {
            String modifyDate = modifyDateNode.getTextContent();
            c = ISO8601.parseEx(modifyDate);
            props.put(DCMetaData.DATE, c);
        }
        return props;
    }
}

