/*
 * Decompiled with CFR 0.152.
 */
package org.codelibs.fess.crawler.extractor.impl;

import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import javax.xml.xpath.XPathEvaluationResult;
import javax.xml.xpath.XPathException;
import javax.xml.xpath.XPathNodes;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.core.stream.StreamUtil;
import org.codelibs.fess.crawler.entity.ExtractData;
import org.codelibs.fess.crawler.exception.CrawlerSystemException;
import org.codelibs.fess.crawler.extractor.impl.AbstractXmlExtractor;
import org.codelibs.fess.crawler.util.XPathAPI;
import org.codelibs.nekohtml.parsers.DOMParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;

public class HtmlExtractor
extends AbstractXmlExtractor {
    protected static final Logger logger = LoggerFactory.getLogger(HtmlExtractor.class);
    protected Pattern metaCharsetPattern = Pattern.compile("<meta.*content\\s*=\\s*['\"].*;\\s*charset=([\\w\\d\\-_]*)['\"]\\s*/?>", 10);
    protected Pattern htmlTagPattern = Pattern.compile("<[^>]+>");
    protected Map<String, String> featureMap = new HashMap<String, String>();
    protected Map<String, String> propertyMap = new HashMap<String, String>();
    protected String contentXpath = "//BODY";
    protected Map<String, String> metadataXpathMap = new HashMap<String, String>();
    private final ThreadLocal<XPathAPI> xpathAPI = new ThreadLocal();

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    protected ExtractData createExtractData(String content) {
        DOMParser parser = this.getDomParser();
        try (StringReader reader = new StringReader(content);){
            parser.parse(new InputSource(reader));
        }
        catch (Exception e2) {
            logger.warn("Failed to parse the content.", (Throwable)e2);
            return new ExtractData(this.extractString(content));
        }
        Document document = parser.getDocument();
        try {
            ExtractData extractData = new ExtractData((String)StreamUtil.stream((Object[])this.getStringsByXPath(document, this.contentXpath)).get(stream -> stream.collect(Collectors.joining(" "))));
            this.metadataXpathMap.entrySet().stream().forEach(e -> extractData.putValues((String)e.getKey(), this.getStringsByXPath(document, (String)e.getValue())));
            ExtractData extractData2 = extractData;
            return extractData2;
        }
        finally {
            this.xpathAPI.remove();
        }
    }

    protected String[] getStringsByXPath(Document document, String path) {
        try {
            XPathEvaluationResult<?> xObj = this.getXPathAPI().eval(document, path);
            switch (xObj.type()) {
                case BOOLEAN: {
                    Boolean b = (Boolean)xObj.value();
                    return new String[]{b.toString()};
                }
                case NUMBER: {
                    Number d = (Number)xObj.value();
                    return new String[]{d.toString()};
                }
                case STRING: {
                    String str = (String)xObj.value();
                    return new String[]{str.trim()};
                }
                case NODESET: {
                    XPathNodes nodeList = (XPathNodes)xObj.value();
                    ArrayList<String> strList = new ArrayList<String>();
                    for (int i = 0; i < nodeList.size(); ++i) {
                        Node node = nodeList.get(i);
                        strList.add(node.getTextContent());
                    }
                    return (String[])strList.toArray(String[]::new);
                }
                case NODE: {
                    Node node = (Node)xObj.value();
                    return new String[]{node.getTextContent()};
                }
            }
            Object obj = xObj.value();
            if (obj == null) {
                obj = "";
            }
            return new String[]{obj.toString()};
        }
        catch (XPathException e) {
            logger.warn("Failed to parse the content by {}", (Object)path, (Object)e);
            return StringUtil.EMPTY_STRINGS;
        }
    }

    protected DOMParser getDomParser() {
        DOMParser parser = new DOMParser();
        try {
            for (Map.Entry<String, String> entry : this.featureMap.entrySet()) {
                parser.setFeature(entry.getKey(), "true".equalsIgnoreCase(entry.getValue()));
            }
            for (Map.Entry<String, String> entry : this.propertyMap.entrySet()) {
                parser.setProperty(entry.getKey(), (Object)entry.getValue());
            }
        }
        catch (Exception e) {
            throw new CrawlerSystemException("Invalid parser configuration.", e);
        }
        return parser;
    }

    protected XPathAPI getXPathAPI() {
        XPathAPI cachedXPathAPI = this.xpathAPI.get();
        if (cachedXPathAPI == null) {
            cachedXPathAPI = new XPathAPI();
            this.xpathAPI.set(cachedXPathAPI);
        }
        return cachedXPathAPI;
    }

    public void addMetadata(String name, String xpath) {
        this.metadataXpathMap.put(name, xpath);
    }

    @Override
    protected Pattern getEncodingPattern() {
        return this.metaCharsetPattern;
    }

    @Override
    protected Pattern getTagPattern() {
        return this.htmlTagPattern;
    }

    public Pattern getMetaCharsetPattern() {
        return this.metaCharsetPattern;
    }

    public void setMetaCharsetPattern(Pattern metaCharsetPattern) {
        this.metaCharsetPattern = metaCharsetPattern;
    }

    public Pattern getHtmlTagPattern() {
        return this.htmlTagPattern;
    }

    public void setHtmlTagPattern(Pattern htmlTagPattern) {
        this.htmlTagPattern = htmlTagPattern;
    }

    public Map<String, String> getFeatureMap() {
        return this.featureMap;
    }

    public void setFeatureMap(Map<String, String> featureMap) {
        this.featureMap = featureMap;
    }

    public Map<String, String> getPropertyMap() {
        return this.propertyMap;
    }

    public void setPropertyMap(Map<String, String> propertyMap) {
        this.propertyMap = propertyMap;
    }
}

