package org.esigate.extension;

import java.nio.charset.Charset;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.esigate.Driver;
import org.esigate.events.Event;
import org.esigate.events.EventDefinition;
import org.esigate.events.EventManager;
import org.esigate.events.IEventListener;
import org.esigate.events.impl.ReadEntityEvent;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:esigate-filter-1.0.2.jar:org/esigate/extension/HtmlCharsetProcessor.class */
public class HtmlCharsetProcessor implements Extension, IEventListener {
    private static final Logger LOG = LoggerFactory.getLogger(DefaultCharset.class);
    private static final Pattern PATTERN_META_HTML5 = Pattern.compile(".*<head>.*<meta[^>]+charset=\"([^>^\"]+)\"[^>]*/?>.*</head>.*", 2);
    private static final Pattern PATTERN_META_HTML4_XHTML = Pattern.compile(".*<head>.*<meta[^>]+charset=([^>^\"]+)\"[^>]*/?>.*</head>.*", 2);

    @Override // org.esigate.events.IEventListener
    public boolean event(EventDefinition eventDefinition, Event event) {
        ReadEntityEvent readEntityEvent = (ReadEntityEvent) event;
        Charset charset = null;
        LOG.debug("Content mime type is {}", readEntityEvent.getMimeType());
        if ("text/html".equals(readEntityEvent.getMimeType()) || "application/xhtml+xml".equals(readEntityEvent.getMimeType())) {
            LOG.debug("Supported MIME type, parsing content");
            Matcher matcher = PATTERN_META_HTML5.matcher(readEntityEvent.getEntityContent());
            if (matcher.matches()) {
                LOG.debug("Found HTML5 charset");
                charset = Charset.forName(matcher.group(1));
            }
            Matcher matcher2 = PATTERN_META_HTML4_XHTML.matcher(readEntityEvent.getEntityContent());
            if (matcher2.matches()) {
                LOG.debug("Found HTML/XHTML charset");
                charset = Charset.forName(matcher2.group(1));
            }
        }
        if (charset == null || charset.equals(readEntityEvent.getCharset())) {
            return true;
        }
        LOG.debug("Changing charset fom {} to {}", readEntityEvent.getCharset(), charset);
        readEntityEvent.setEntityContent(new String(readEntityEvent.getRawEntityContent(), charset));
        return true;
    }

    @Override // org.esigate.extension.Extension
    public void init(Driver driver, Properties properties) {
        driver.getEventManager().register(EventManager.EVENT_READ_ENTITY, this);
    }
}
