/*
 * Decompiled with CFR 0.152.
 */
package org.archive.crawler.migrate;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.apache.commons.lang.StringUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

public class MigrateH1to3Tool {
    protected Document sourceOrderXmlDom;
    protected static DocumentBuilder DOCUMENT_BUILDER;

    public static void main(String[] args) throws Exception {
        new MigrateH1to3Tool().instanceMain(args);
    }

    public void instanceMain(String[] args) throws Exception {
        if (args.length != 2) {
            this.printHelp();
            return;
        }
        String sourceOrderXmlFileArg = args[0];
        String destinationH3JobDirArg = args[1];
        File sourceOrderXmlFile = new File(sourceOrderXmlFileArg);
        if (!sourceOrderXmlFile.isFile()) {
            System.err.println("ERROR sourceOrderXmlFileArg is not a file: " + sourceOrderXmlFileArg);
            System.exit(1);
        }
        File destinationH3JobDir = new File(destinationH3JobDirArg);
        org.archive.util.FileUtils.ensureWriteableDirectory((File)destinationH3JobDir);
        System.out.println("H1 source: " + sourceOrderXmlFile.getAbsolutePath());
        System.out.println("H3 destination: " + destinationH3JobDir.getAbsolutePath());
        System.out.print("Migrating settings...");
        InputStream inStream = this.getClass().getResourceAsStream("/org/archive/crawler/migrate/migrate-template-crawler-beans.cxml");
        String template = IOUtils.toString((InputStream)inStream);
        inStream.close();
        Map<String, String> migrateH1toH3Map = this.getMigrateMap();
        try {
            this.sourceOrderXmlDom = DOCUMENT_BUILDER.parse(sourceOrderXmlFile);
        }
        catch (SAXException e) {
            System.err.println("ERROR caught exception parsing input file: " + e.getMessage() + "\n");
            e.printStackTrace();
        }
        Map<String, String> h1simpleSettings = MigrateH1to3Tool.flattenH1Order(this.sourceOrderXmlDom);
        ArrayList<String> notApplicable = new ArrayList<String>();
        ArrayList<String> needsAttention = new ArrayList<String>();
        int migrated = 0;
        StringBuilder sb = new StringBuilder();
        for (String key : h1simpleSettings.keySet()) {
            String beanPath = migrateH1toH3Map.get(key);
            String value = h1simpleSettings.get(key);
            System.out.print(".");
            if (beanPath == null) {
                needsAttention.add(key + " " + value);
                continue;
            }
            if (beanPath.startsWith("$")) {
                notApplicable.add(key + " " + value);
                continue;
            }
            if (beanPath.startsWith("*")) {
                if (beanPath.equals("*metadata.userAgentTemplate")) {
                    this.splitH1userAgent(value, sb);
                    migrated += 2;
                    continue;
                }
                needsAttention.add(key + " " + value);
                continue;
            }
            if (beanPath.startsWith("^")) {
                value = value.toUpperCase();
                beanPath = beanPath.substring(1);
            }
            sb.append(beanPath).append("=").append(value).append("\n");
            ++migrated;
        }
        System.out.println();
        System.out.println();
        String beansCxml = template.replace("###MIGRATE_OVERRIDES###", sb.toString());
        File targetBeansXmlFile = new File(destinationH3JobDir, "crawler-beans.cxml");
        FileUtils.writeStringToFile((File)targetBeansXmlFile, (String)beansCxml);
        File sourceSeedsTxtFile = new File(sourceOrderXmlFile.getParentFile(), "seeds.txt");
        File destinationSeedsTxtFile = new File(destinationH3JobDir, "seeds.txt");
        if (!sourceSeedsTxtFile.isFile()) {
            System.err.println("ERROR sourceSeedsTxtFile not found: " + sourceSeedsTxtFile);
            System.exit(1);
        }
        FileUtils.copyFile((File)sourceSeedsTxtFile, (File)destinationSeedsTxtFile);
        System.out.println(notApplicable.size() + " settings skipped as not-applicable");
        System.out.println("These are probably harmless, but if the following settings were");
        System.out.println("important to your crawl process, investigate other options.");
        this.listProblems(notApplicable);
        System.out.println();
        System.out.println(needsAttention.size() + " settings may need attention");
        System.out.println("Please review your original crawl and the created H3 job, for each");
        System.out.println("of the following, and manually update as needed.");
        this.listProblems(needsAttention);
        System.out.println();
        System.out.println(migrated + " H1 settings successfully migrated to H3 configuration");
        System.out.println();
        System.out.println("Review your converted crawler-beans.cxml at:");
        System.out.println(targetBeansXmlFile.getAbsolutePath());
    }

    protected void listProblems(List<String> problems) {
        for (String problem : problems) {
            System.out.println(" " + problem);
        }
    }

    protected void printHelp() {
        System.out.println("Usage: takes two arguments. First argument is path to a Heritrix 1.X order.xml, second argument is path for a new Heritrix 3.X job directory. Will generate a basic H3 job with as many of the H1 settings replicated as currently possible.");
    }

    protected void splitH1userAgent(String userAgent, StringBuilder sb) {
        String originalUrl = userAgent.replaceAll("^.*?\\+(http://[^)]*).*$", "$1");
        String newTemplate = userAgent.replace(originalUrl, "@OPERATOR_CONTACT_URL@");
        sb.append("metadata.operatorContactUrl=").append(originalUrl).append("\n").append("metadata.userAgentTemplate=").append(newTemplate).append("\n");
    }

    protected Map<String, String> getMigrateMap() throws IOException {
        HashMap<String, String> map = new HashMap<String, String>();
        InputStream inStream = this.getClass().getResourceAsStream("/org/archive/crawler/migrate/H1toH3.map");
        LineIterator iter = IOUtils.lineIterator((InputStream)inStream, (String)"UTF-8");
        while (iter.hasNext()) {
            String[] fields = iter.nextLine().split("\\|");
            map.put(fields[1], fields[0]);
        }
        inStream.close();
        return map;
    }

    public static Map<String, String> flattenH1Order(Document h1order) throws XPathExpressionException {
        LinkedHashMap<String, String> flattened = new LinkedHashMap<String, String>();
        XPathExpression xpath = XPathFactory.newInstance().newXPath().compile("//text()");
        NodeList nodes = (NodeList)xpath.evaluate(h1order, XPathConstants.NODESET);
        for (int i = 0; i < nodes.getLength(); ++i) {
            Node node = nodes.item(i);
            if (!StringUtils.isNotBlank((String)node.getTextContent())) continue;
            String pseudoXPath = MigrateH1to3Tool.getPseudoXpath(node.getParentNode());
            pseudoXPath = pseudoXPath.replaceFirst("/crawl-order", "/");
            flattened.put(pseudoXPath, node.getTextContent());
        }
        return flattened;
    }

    protected static String getPseudoXpath(Node node) {
        String pseudoXpath = "";
        Node currentNode = node;
        while (currentNode.getParentNode() != null) {
            String thisSegment = currentNode.getNodeName();
            if (currentNode.getAttributes().getNamedItem("name") != null) {
                thisSegment = "*[@" + currentNode.getAttributes().getNamedItem("name") + "]";
            }
            pseudoXpath = "/" + thisSegment + pseudoXpath;
            currentNode = currentNode.getParentNode();
        }
        return pseudoXpath;
    }

    static {
        try {
            DOCUMENT_BUILDER = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        }
        catch (ParserConfigurationException e) {
            e.printStackTrace();
        }
    }
}

