/*
 * Decompiled with CFR 0.152.
 */
package org.dromara.pdf.pdfbox.core.ext.extractor;

import java.awt.Rectangle;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.PDFTextStripperByArea;
import org.dromara.pdf.pdfbox.core.base.Document;
import org.dromara.pdf.pdfbox.core.ext.extractor.AbstractExtractor;

public abstract class AbstractTextExtractor
extends AbstractExtractor {
    protected static final Pattern TABLE_PATTERN = Pattern.compile("(\\S[^\\n\\r]+)", 66);

    public AbstractTextExtractor(Document document) {
        super(document);
    }

    public abstract Map<Integer, List<String>> extractByRegex(String var1, int ... var2);

    public abstract Map<Integer, Map<String, String>> extractByRegionArea(String var1, Map<String, Rectangle> var2, int ... var3);

    public abstract Map<Integer, Map<String, List<List<String>>>> extractByTable(String var1, Map<String, Rectangle> var2, int ... var3);

    protected List<String> processTextByRegex(String regex, PDFTextStripper stripper) {
        ArrayList<String> list = new ArrayList<String>(32);
        String text = stripper.getText(this.getDocument());
        if (Objects.nonNull(regex) && !regex.trim().isEmpty()) {
            Matcher matcher = Pattern.compile(regex, 66).matcher(text);
            while (matcher.find()) {
                list.add(matcher.group());
            }
        } else {
            list.add(text);
        }
        return list;
    }

    protected Map<String, String> processTextByRegionArea(Map<String, Rectangle> regionArea, String wordSeparator, PDPage page) {
        HashMap<String, String> data;
        PDFTextStripperByArea stripper = new PDFTextStripperByArea();
        stripper.setWordSeparator(wordSeparator);
        regionArea.forEach((arg_0, arg_1) -> ((PDFTextStripperByArea)stripper).addRegion(arg_0, arg_1));
        if (regionArea.isEmpty()) {
            data = new HashMap<String, String>(0);
        } else {
            Set<String> keySet = regionArea.keySet();
            data = new HashMap(keySet.size());
            stripper.extractRegions(page);
            for (String region : keySet) {
                data.put(region, stripper.getTextForRegion(region));
            }
        }
        return data;
    }

    protected Map<String, List<List<String>>> processTextByTable(Map<String, Rectangle> regionArea, String wordSeparator, PDPage page) {
        Map<String, String> sourceMap = this.processTextByRegionArea(regionArea, wordSeparator, page);
        if (sourceMap.isEmpty()) {
            return new HashMap<String, List<List<String>>>(0);
        }
        HashMap<String, List<List<String>>> dataMap = new HashMap<String, List<List<String>>>(sourceMap.size());
        sourceMap.forEach((key, value) -> {
            ArrayList rows = new ArrayList(16);
            ArrayList<String> columns = new ArrayList<String>(16);
            Matcher matcher = TABLE_PATTERN.matcher((CharSequence)value);
            while (matcher.find()) {
                columns.add(matcher.group());
            }
            for (String rowText : columns) {
                rows.add(Arrays.stream(rowText.split(wordSeparator)).collect(Collectors.toList()));
            }
            dataMap.put((String)key, rows);
        });
        return dataMap;
    }

    protected <R> Map<Integer, R> extractText(Function<R> function, String wordSeparator, Map<String, Rectangle> regionArea, int ... pageIndexes) {
        HashMap<Integer, R> data = new HashMap<Integer, R>(32);
        PDPageTree pageTree = this.getDocument().getPages();
        if (Objects.nonNull(pageIndexes) && pageIndexes.length > 0) {
            for (int index : pageIndexes) {
                try {
                    data.put(index, function.apply(regionArea, wordSeparator, pageTree.get(index)));
                }
                catch (Exception e) {
                    this.log.warn((Object)("the index['" + index + "'] is invalid, will be ignored"));
                }
            }
        } else {
            int index = 0;
            for (PDPage page : pageTree) {
                data.put(index, function.apply(regionArea, wordSeparator, page));
                ++index;
            }
        }
        return data;
    }

    @FunctionalInterface
    protected static interface Function<R> {
        public R apply(Map<String, Rectangle> var1, String var2, PDPage var3);
    }
}

