/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.parser.html;

import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.tika.detect.EncodingDetector;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.utils.CharsetUtils;

public class HtmlEncodingDetector
implements EncodingDetector {
    private static final int META_TAG_BUFFER_SIZE = 8192;
    private static final Pattern HTTP_META_PATTERN = Pattern.compile("(?is)<\\s*meta\\s+([^<>]+)");
    private static final Pattern FLEXIBLE_CHARSET_ATTR_PATTERN = Pattern.compile("(?is)charset\\s*=\\s*(?:['\\\"]\\s*)?([-_:\\.a-z0-9]+)");
    private static final Charset ASCII = Charset.forName("US-ASCII");

    public Charset detect(InputStream input, Metadata metadata) throws IOException {
        if (input == null) {
            return null;
        }
        input.mark(8192);
        byte[] buffer = new byte[8192];
        int n = 0;
        int m = input.read(buffer);
        while (m != -1 && n < buffer.length) {
            m = input.read(buffer, n += m, buffer.length - n);
        }
        input.reset();
        String head = ASCII.decode(ByteBuffer.wrap(buffer, 0, n)).toString();
        Matcher equiv = HTTP_META_PATTERN.matcher(head);
        Matcher charsetMatcher = FLEXIBLE_CHARSET_ATTR_PATTERN.matcher("");
        while (equiv.find()) {
            String attrs = equiv.group(1);
            charsetMatcher.reset(attrs);
            while (charsetMatcher.find()) {
                String candCharset = charsetMatcher.group(1);
                if (!CharsetUtils.isSupported((String)candCharset)) continue;
                try {
                    return CharsetUtils.forName((String)candCharset);
                }
                catch (Exception e) {
                }
            }
        }
        return null;
    }
}

