/*
 * Decompiled with CFR 0.152.
 */
package org.codelibs.fess.crawler.transformer.impl;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Resource;
import javax.xml.xpath.XPathException;
import javax.xml.xpath.XPathNodes;
import org.codelibs.core.io.InputStreamUtil;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.crawler.Constants;
import org.codelibs.fess.crawler.builder.RequestDataBuilder;
import org.codelibs.fess.crawler.container.CrawlerContainer;
import org.codelibs.fess.crawler.entity.AccessResultData;
import org.codelibs.fess.crawler.entity.RequestData;
import org.codelibs.fess.crawler.entity.ResponseData;
import org.codelibs.fess.crawler.entity.ResultData;
import org.codelibs.fess.crawler.exception.CrawlerSystemException;
import org.codelibs.fess.crawler.exception.CrawlingAccessException;
import org.codelibs.fess.crawler.helper.EncodingHelper;
import org.codelibs.fess.crawler.helper.UrlConvertHelper;
import org.codelibs.fess.crawler.transformer.impl.AbstractTransformer;
import org.codelibs.fess.crawler.util.CharUtil;
import org.codelibs.fess.crawler.util.XPathAPI;
import org.codelibs.nekohtml.parsers.DOMParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;

public class HtmlTransformer
extends AbstractTransformer {
    private static final Logger logger = LoggerFactory.getLogger(HtmlTransformer.class);
    protected static final String LOCATION_HEADER = "Location";
    @Resource
    protected CrawlerContainer crawlerContainer;
    protected Map<String, String> featureMap = new HashMap<String, String>();
    protected Map<String, String> propertyMap = new HashMap<String, String>();
    protected Map<String, String> childUrlRuleMap = new LinkedHashMap<String, String>();
    protected String defaultEncoding;
    protected int preloadSizeForCharset = 2048;
    protected Pattern invalidUrlPattern = Pattern.compile("^\\s*javascript:|^\\s*mailto:|^\\s*irc:|^\\s*skype:|^\\s*about:|^\\s*fscommand:|^\\s*aim:|^\\s*msnim:|^\\s*news:|^\\s*tel:|^\\s*unsaved:|^\\s*data:|^\\s*android-app:|^\\s*ios-app:|^\\s*callto:", 2);
    private final ThreadLocal<XPathAPI> xpathAPI = new ThreadLocal();

    @Override
    public ResultData transform(ResponseData responseData) {
        if (responseData == null || !responseData.hasResponseBody()) {
            throw new CrawlingAccessException("No response body.");
        }
        this.updateCharset(responseData);
        ResultData resultData = new ResultData();
        resultData.setTransformerName(this.getName());
        try {
            this.storeData(responseData, resultData);
            if (this.isHtml(responseData) && !responseData.isNoFollow()) {
                this.storeChildUrls(responseData, resultData);
            }
        }
        finally {
            this.xpathAPI.remove();
        }
        Object redirectUrlObj = responseData.getMetaDataMap().get(LOCATION_HEADER);
        if (redirectUrlObj instanceof String) {
            UrlConvertHelper urlConvertHelper = (UrlConvertHelper)this.crawlerContainer.getComponent("urlConvertHelper");
            resultData.addUrl(RequestDataBuilder.newRequestData().get().url(urlConvertHelper.convert(redirectUrlObj.toString())).build());
        }
        return resultData;
    }

    protected boolean isHtml(ResponseData responseData) {
        String mimeType = responseData.getMimeType();
        return "text/html".equals(mimeType) || "application/xhtml+xml".equals(mimeType);
    }

    public void addChildUrlRule(String tagName, String attrName) {
        if (StringUtil.isNotBlank((String)tagName) && StringUtil.isNotBlank((String)attrName)) {
            this.childUrlRuleMap.put(tagName, attrName);
        }
    }

    protected XPathAPI getXPathAPI() {
        XPathAPI cachedXPathAPI = this.xpathAPI.get();
        if (cachedXPathAPI == null) {
            cachedXPathAPI = new XPathAPI();
            this.xpathAPI.set(cachedXPathAPI);
        }
        return cachedXPathAPI;
    }

    protected void storeChildUrls(ResponseData responseData, ResultData resultData) {
        List<RequestData> requestDataList = new ArrayList<RequestData>();
        try (InputStream is = responseData.getResponseBody();){
            URL url;
            DOMParser parser = this.getDomParser();
            parser.parse(new InputSource(is));
            Document document = parser.getDocument();
            String baseHref = this.getBaseHref(document);
            try {
                url = new URL(baseHref == null ? responseData.getUrl() : baseHref);
            }
            catch (MalformedURLException e) {
                url = new URL(responseData.getUrl());
            }
            for (Map.Entry<String, String> entry : this.childUrlRuleMap.entrySet()) {
                for (String childUrl : this.getUrlFromTagAttribute(url, document, entry.getKey(), entry.getValue(), responseData.getCharSet())) {
                    requestDataList.add(RequestDataBuilder.newRequestData().get().url(childUrl).build());
                }
            }
            requestDataList = this.convertChildUrlList(requestDataList);
            resultData.addAllUrl(requestDataList);
            resultData.addAllUrl(responseData.getChildUrlSet());
            RequestData requestData = responseData.getRequestData();
            resultData.removeUrl(requestData);
            resultData.removeUrl(this.getDuplicateUrl(requestData));
        }
        catch (CrawlerSystemException e) {
            throw e;
        }
        catch (Exception e) {
            throw new CrawlerSystemException("Could not store data.", e);
        }
    }

    protected List<RequestData> convertChildUrlList(List<RequestData> requestDataList) {
        try {
            UrlConvertHelper urlConvertHelper = (UrlConvertHelper)this.crawlerContainer.getComponent("urlConvertHelper");
            for (RequestData requestData : requestDataList) {
                requestData.setUrl(urlConvertHelper.convert(requestData.getUrl()));
            }
            return requestDataList;
        }
        catch (Exception exception) {
            return requestDataList;
        }
    }

    protected void storeData(ResponseData responseData, ResultData resultData) {
        try (InputStream is = responseData.getResponseBody();){
            byte[] data = InputStreamUtil.getBytes((InputStream)is);
            resultData.setData(data);
            resultData.setEncoding(responseData.getCharSet());
        }
        catch (CrawlerSystemException e) {
            throw e;
        }
        catch (Exception e) {
            throw new CrawlerSystemException("Could not store data.", e);
        }
    }

    protected void updateCharset(ResponseData responseData) {
        try (InputStream is = responseData.getResponseBody();){
            String encoding = this.loadCharset(is);
            if (encoding == null) {
                if (this.defaultEncoding == null) {
                    responseData.setCharSet("UTF-8");
                } else if (responseData.getCharSet() == null) {
                    responseData.setCharSet(this.defaultEncoding);
                }
            } else {
                responseData.setCharSet(encoding.trim());
            }
            if (!this.isSupportedCharset(responseData.getCharSet())) {
                responseData.setCharSet("UTF-8");
            }
        }
        catch (CrawlerSystemException e) {
            throw e;
        }
        catch (Exception e) {
            throw new CrawlerSystemException("Could not load response data: " + responseData.getUrl(), e);
        }
    }

    protected boolean isSupportedCharset(String charsetName) {
        if (charsetName == null) {
            return false;
        }
        try {
            Charset.forName(charsetName);
        }
        catch (Exception e) {
            return false;
        }
        return true;
    }

    protected String loadCharset(InputStream inputStream) {
        BufferedInputStream bis = null;
        String encoding = null;
        try {
            bis = new BufferedInputStream(inputStream);
            byte[] buffer = new byte[this.preloadSizeForCharset];
            int size = bis.read(buffer);
            if (size != -1) {
                String content = new String(buffer, 0, size);
                encoding = this.parseCharset(content);
            }
        }
        catch (IOException e) {
            throw new CrawlingAccessException("Could not load a content.", e);
        }
        return this.normalizeEncoding(encoding);
    }

    protected String normalizeEncoding(String encoding) {
        try {
            EncodingHelper encodingHelper = (EncodingHelper)this.crawlerContainer.getComponent("encodingHelper");
            return encodingHelper.normalize(encoding);
        }
        catch (Exception exception) {
            return encoding;
        }
    }

    protected String parseCharset(String content) {
        Pattern pattern = Pattern.compile("; *charset *= *([a-zA-Z0-9\\-_]+)", 2);
        Matcher matcher = pattern.matcher(content);
        if (matcher.find()) {
            return matcher.group(1);
        }
        return null;
    }

    protected RequestData getDuplicateUrl(RequestData requestData) {
        String url = requestData.getUrl();
        if (url.endsWith("/")) {
            requestData.setUrl(url.substring(0, url.length() - 1));
        } else {
            requestData.setUrl(url + "/");
        }
        return requestData;
    }

    protected DOMParser getDomParser() {
        DOMParser parser = new DOMParser();
        try {
            for (Map.Entry<String, String> entry : this.featureMap.entrySet()) {
                parser.setFeature(entry.getKey(), "true".equalsIgnoreCase(entry.getValue()));
            }
            for (Map.Entry<String, String> entry : this.propertyMap.entrySet()) {
                parser.setProperty(entry.getKey(), (Object)entry.getValue());
            }
        }
        catch (Exception e) {
            throw new CrawlerSystemException("Invalid parser configuration.", e);
        }
        return parser;
    }

    protected String getBaseHref(Document document) {
        try {
            Object attrValue;
            Node node;
            Node attrNode;
            XPathNodes list = this.getXPathAPI().selectNodeList(document, "//BASE");
            if (list.size() > 0 && (attrNode = (node = list.get(0)).getAttributes().getNamedItem("href")) != null && StringUtil.isNotBlank((String)(attrValue = attrNode.getNodeValue()))) {
                if (((String)attrValue).startsWith("www.")) {
                    attrValue = "http://" + (String)attrValue;
                }
                return attrValue;
            }
        }
        catch (Exception e) {
            logger.warn("Could not get a base tag. ", (Throwable)e);
        }
        return null;
    }

    protected List<String> getUrlFromTagAttribute(URL url, Document document, String xpath, String attr, String encoding) {
        if (logger.isDebugEnabled()) {
            logger.debug("Base URL: {}", (Object)url);
        }
        ArrayList<String> urlList = new ArrayList<String>();
        try {
            XPathNodes list = this.getXPathAPI().selectNodeList(document, xpath);
            for (int i = 0; i < list.size(); ++i) {
                String attrValue;
                Node node = list.get(i);
                Node attrNode = node.getAttributes().getNamedItem(attr);
                if (attrNode == null || !this.isValidPath(attrValue = attrNode.getNodeValue())) continue;
                this.addChildUrlFromTagAttribute(urlList, url, attrValue, encoding);
            }
        }
        catch (XPathException e) {
            logger.warn("Could not get urls: (" + xpath + ", " + attr + ")", (Throwable)e);
        }
        return urlList;
    }

    protected void addChildUrlFromTagAttribute(List<String> urlList, URL url, String attrValue, String encoding) {
        try {
            String childUrlValue = attrValue.trim();
            URL childUrl = childUrlValue.startsWith("?") ? new URL(url.toExternalForm() + childUrlValue) : new URL(url, childUrlValue);
            String u = this.encodeUrl(this.normalizeUrl(childUrl.toExternalForm()), encoding);
            if (logger.isDebugEnabled()) {
                logger.debug("{} -> {}", (Object)attrValue, (Object)u);
            }
            if (StringUtil.isNotBlank((String)u)) {
                if (logger.isDebugEnabled()) {
                    logger.debug("Add Child: {}", (Object)u);
                }
                urlList.add(u);
            } else if (logger.isDebugEnabled()) {
                logger.debug("Skip Child: {}", (Object)u);
            }
        }
        catch (MalformedURLException e) {
            logger.warn("Malformed URL: " + attrValue, (Throwable)e);
        }
    }

    protected String encodeUrl(String url, String enc) {
        if (StringUtil.isBlank((String)url) || StringUtil.isBlank((String)enc)) {
            return url;
        }
        StringBuilder buf = new StringBuilder(url.length() + 100);
        for (char c : url.toCharArray()) {
            if (CharUtil.isUrlChar(c)) {
                buf.append(c);
                continue;
            }
            try {
                buf.append(URLEncoder.encode(String.valueOf(c), enc));
            }
            catch (UnsupportedEncodingException unsupportedEncodingException) {
                // empty catch block
            }
        }
        return buf.toString();
    }

    protected String normalizeUrl(String u) {
        if (u == null) {
            return null;
        }
        String url = u.trim();
        int idx = url.indexOf(35);
        if (idx >= 0) {
            url = url.substring(0, idx);
        }
        if ((idx = (url = url.replace("/./", "/")).indexOf(";jsessionid")) >= 0) {
            url = url.replaceFirst(";jsessionid=[a-zA-Z0-9\\.]*", "");
        }
        if (url.indexOf(32) >= 0) {
            url = url.replace(" ", "%20");
        }
        String oldUrl = null;
        while (url.indexOf("/../") >= 0 && !url.equals(oldUrl)) {
            oldUrl = url;
            url = url.replaceFirst("/[^/]+/\\.\\./", "/");
        }
        return url.replaceAll("([^:])/+", "$1/");
    }

    protected boolean isValidPath(String path) {
        if (StringUtil.isBlank((String)path)) {
            return false;
        }
        Matcher matcher = this.invalidUrlPattern.matcher(path);
        return !matcher.find();
    }

    public void addFeature(String key, String value) {
        if (StringUtil.isBlank((String)key) || StringUtil.isBlank((String)value)) {
            throw new CrawlerSystemException("key or value is null.");
        }
        this.featureMap.put(key, value);
    }

    public void addProperty(String key, String value) {
        if (StringUtil.isBlank((String)key) || StringUtil.isBlank((String)value)) {
            throw new CrawlerSystemException("key or value is null.");
        }
        this.propertyMap.put(key, value);
    }

    @Override
    public Object getData(AccessResultData<?> accessResultData) {
        if (!this.getName().equals(accessResultData.getTransformerName())) {
            throw new CrawlerSystemException("Transformer is invalid. Use " + accessResultData.getTransformerName() + ". This transformer is " + this.getName() + ".");
        }
        byte[] data = accessResultData.getData();
        if (data == null) {
            return null;
        }
        String encoding = accessResultData.getEncoding();
        try {
            return new String(data, encoding == null ? "UTF-8" : encoding);
        }
        catch (UnsupportedEncodingException e) {
            if (logger.isInfoEnabled()) {
                logger.info("Invalid charsetName: " + encoding + ". Changed to UTF-8", (Throwable)e);
            }
            return new String(data, Constants.UTF_8_CHARSET);
        }
    }

    public Map<String, String> getFeatureMap() {
        return this.featureMap;
    }

    public void setFeatureMap(Map<String, String> featureMap) {
        this.featureMap = featureMap;
    }

    public Map<String, String> getPropertyMap() {
        return this.propertyMap;
    }

    public void setPropertyMap(Map<String, String> propertyMap) {
        this.propertyMap = propertyMap;
    }

    public Map<String, String> getChildUrlRuleMap() {
        return this.childUrlRuleMap;
    }

    public void setChildUrlRuleMap(Map<String, String> childUrlRuleMap) {
        this.childUrlRuleMap = childUrlRuleMap;
    }

    public String getDefaultEncoding() {
        return this.defaultEncoding;
    }

    public void setDefaultEncoding(String defaultEncoding) {
        this.defaultEncoding = defaultEncoding;
    }

    public int getPreloadSizeForCharset() {
        return this.preloadSizeForCharset;
    }

    public void setPreloadSizeForCharset(int preloadSizeForCharset) {
        this.preloadSizeForCharset = preloadSizeForCharset;
    }

    public Pattern getInvalidUrlPattern() {
        return this.invalidUrlPattern;
    }

    public void setInvalidUrlPattern(Pattern invalidUrlPattern) {
        this.invalidUrlPattern = invalidUrlPattern;
    }
}

