/*
 * Decompiled with CFR 0.152.
 */
package org.codelibs.fess.crawler.extractor.impl;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Writer;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
import javax.annotation.PostConstruct;
import org.apache.commons.io.output.DeferredFileOutputStream;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.parser.ocr.TesseractOCRConfig;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.SecureContentHandler;
import org.codelibs.core.beans.util.BeanUtil;
import org.codelibs.core.io.CloseableUtil;
import org.codelibs.core.io.CopyUtil;
import org.codelibs.core.io.PropertiesUtil;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.crawler.entity.ExtractData;
import org.codelibs.fess.crawler.extractor.impl.PasswordBasedExtractor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class TikaExtractor
extends PasswordBasedExtractor {
    private static final Logger logger = LoggerFactory.getLogger(TikaExtractor.class);
    public static final String TIKA_TESSERACT_CONFIG = "tika.tesseract.config";
    public static final String TIKA_PDF_CONFIG = "tika.pdf.config";
    public static final String NORMALIZE_TEXT = "normalize_text";
    private static final String FILE_PASSWORD = "fess.file.password";
    protected String outputEncoding = "UTF-8";
    protected boolean readAsTextIfFailed = false;
    protected long maxCompressionRatio = 100L;
    protected long maxUncompressionSize = 1000000L;
    protected int initialBufferSize = 10000;
    protected boolean replaceDuplication = false;
    protected int[] spaceChars = new int[]{32, 160, 12288, 65533};
    protected int memorySize = 0x100000;
    protected int maxAlphanumTermSize = -1;
    protected int maxSymbolTermSize = -1;
    protected TikaConfig tikaConfig;
    private final Map<String, TesseractOCRConfig> tesseractOCRConfigMap = new ConcurrentHashMap<String, TesseractOCRConfig>();
    private final Map<String, PDFParserConfig> pdfParserConfigMap = new ConcurrentHashMap<String, PDFParserConfig>();

    @PostConstruct
    public void init() {
        if (this.tikaConfig == null && this.crawlerContainer != null) {
            try {
                this.tikaConfig = (TikaConfig)this.crawlerContainer.getComponent("tikaConfig");
            }
            catch (Exception e) {
                logger.debug("tikaConfig component is not found.", (Throwable)e);
            }
        }
        if (this.tikaConfig == null) {
            this.tikaConfig = TikaConfig.getDefaultConfig();
        }
        if (logger.isDebugEnabled()) {
            Parser parser = this.tikaConfig.getParser();
            logger.debug("supportedTypes: {}", (Object)parser.getSupportedTypes(new ParseContext()));
        }
    }

    /*
     * Exception decompiling
     */
    @Override
    public ExtractData getText(InputStream inputStream, Map<String, String> params) {
        /*
         * This method has failed to decompile.  When submitting a bug report, please provide this stack trace, and (if you hold appropriate legal rights) the relevant class file.
         * 
         * org.benf.cfr.reader.util.ConfusedCFRException: Started 2 blocks at once
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.Op04StructuredStatement.getStartingBlocks(Op04StructuredStatement.java:412)
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.Op04StructuredStatement.buildNestedBlocks(Op04StructuredStatement.java:487)
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.Op03SimpleStatement.createInitialStructuredBlock(Op03SimpleStatement.java:736)
         *     at org.benf.cfr.reader.bytecode.CodeAnalyser.getAnalysisInner(CodeAnalyser.java:850)
         *     at org.benf.cfr.reader.bytecode.CodeAnalyser.getAnalysisOrWrapFail(CodeAnalyser.java:278)
         *     at org.benf.cfr.reader.bytecode.CodeAnalyser.getAnalysis(CodeAnalyser.java:201)
         *     at org.benf.cfr.reader.entities.attributes.AttributeCode.analyse(AttributeCode.java:94)
         *     at org.benf.cfr.reader.entities.Method.analyse(Method.java:531)
         *     at org.benf.cfr.reader.entities.ClassFile.analyseMid(ClassFile.java:1055)
         *     at org.benf.cfr.reader.entities.ClassFile.analyseTop(ClassFile.java:942)
         *     at org.benf.cfr.reader.Driver.doJarVersionTypes(Driver.java:257)
         *     at org.benf.cfr.reader.Driver.doJar(Driver.java:139)
         *     at org.benf.cfr.reader.CfrDriverImpl.analyse(CfrDriverImpl.java:76)
         *     at org.benf.cfr.reader.Main.main(Main.java:54)
         */
        throw new IllegalStateException("Decompilation failed");
    }

    protected ParseContext createParseContext(Parser parser, Map<String, String> params) {
        String pdfParserConfigPath;
        String tesseractConfigPath;
        ParseContext parseContext = new ParseContext();
        parseContext.set(Parser.class, (Object)parser);
        String string = tesseractConfigPath = params != null ? params.get(TIKA_TESSERACT_CONFIG) : null;
        if (StringUtil.isNotBlank((String)tesseractConfigPath)) {
            TesseractOCRConfig tesseractOCRConfig = this.tesseractOCRConfigMap.get(tesseractConfigPath);
            if (tesseractOCRConfig == null) {
                Properties props = new Properties();
                PropertiesUtil.load((Properties)props, (String)tesseractConfigPath);
                Map<String, String> propMap = props.entrySet().stream().collect(Collectors.toMap(e -> (String)e.getKey(), e -> (String)e.getValue()));
                tesseractOCRConfig = new TesseractOCRConfig();
                BeanUtil.copyMapToBean(propMap, (Object)tesseractOCRConfig);
                this.tesseractOCRConfigMap.put(tesseractConfigPath, tesseractOCRConfig);
            }
            parseContext.set(TesseractOCRConfig.class, (Object)tesseractOCRConfig);
        }
        String string2 = pdfParserConfigPath = params != null ? params.get(TIKA_PDF_CONFIG) : null;
        if (StringUtil.isNotBlank((String)pdfParserConfigPath)) {
            PDFParserConfig pdfParserConfig = this.pdfParserConfigMap.get(pdfParserConfigPath);
            if (pdfParserConfig == null) {
                Properties props = new Properties();
                PropertiesUtil.load((Properties)props, (String)pdfParserConfigPath);
                Map<String, String> propMap = props.entrySet().stream().collect(Collectors.toMap(e -> (String)e.getKey(), e -> (String)e.getValue()));
                pdfParserConfig = new PDFParserConfig();
                BeanUtil.copyMapToBean(propMap, (Object)pdfParserConfig);
                this.pdfParserConfigMap.put(pdfParserConfigPath, pdfParserConfig);
            }
            parseContext.set(PDFParserConfig.class, (Object)pdfParserConfig);
        }
        parseContext.set(PasswordProvider.class, metadata -> metadata.get(FILE_PASSWORD));
        return parseContext;
    }

    protected InputStream getContentStream(DeferredFileOutputStream dfos) throws IOException {
        if (dfos.isInMemory()) {
            return new ByteArrayInputStream(dfos.getData());
        }
        return new BufferedInputStream(new FileInputStream(dfos.getFile()));
    }

    /*
     * Exception decompiling
     */
    protected String getContent(ContentWriter out, String encoding, boolean normalizeText) throws TikaException {
        /*
         * This method has failed to decompile.  When submitting a bug report, please provide this stack trace, and (if you hold appropriate legal rights) the relevant class file.
         * 
         * org.benf.cfr.reader.util.ConfusedCFRException: Started 2 blocks at once
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.Op04StructuredStatement.getStartingBlocks(Op04StructuredStatement.java:412)
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.Op04StructuredStatement.buildNestedBlocks(Op04StructuredStatement.java:487)
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.Op03SimpleStatement.createInitialStructuredBlock(Op03SimpleStatement.java:736)
         *     at org.benf.cfr.reader.bytecode.CodeAnalyser.getAnalysisInner(CodeAnalyser.java:850)
         *     at org.benf.cfr.reader.bytecode.CodeAnalyser.getAnalysisOrWrapFail(CodeAnalyser.java:278)
         *     at org.benf.cfr.reader.bytecode.CodeAnalyser.getAnalysis(CodeAnalyser.java:201)
         *     at org.benf.cfr.reader.entities.attributes.AttributeCode.analyse(AttributeCode.java:94)
         *     at org.benf.cfr.reader.entities.Method.analyse(Method.java:531)
         *     at org.benf.cfr.reader.entities.ClassFile.analyseMid(ClassFile.java:1055)
         *     at org.benf.cfr.reader.entities.ClassFile.analyseTop(ClassFile.java:942)
         *     at org.benf.cfr.reader.Driver.doJarVersionTypes(Driver.java:257)
         *     at org.benf.cfr.reader.Driver.doJar(Driver.java:139)
         *     at org.benf.cfr.reader.CfrDriverImpl.analyse(CfrDriverImpl.java:76)
         *     at org.benf.cfr.reader.Main.main(Main.java:54)
         */
        throw new IllegalStateException("Decompilation failed");
    }

    protected Metadata createMetadata(String resourceName, String contentType, String contentEncoding, String pdfPassword) {
        Metadata metadata = new Metadata();
        if (StringUtil.isNotEmpty((String)resourceName)) {
            metadata.set("resourceName", resourceName);
        }
        if (StringUtil.isNotBlank((String)contentType)) {
            metadata.set("Content-Type", contentType);
        }
        if (StringUtil.isNotBlank((String)contentEncoding)) {
            metadata.set("Content-Encoding", contentEncoding);
        }
        if (pdfPassword != null) {
            metadata.add(FILE_PASSWORD, pdfPassword);
        }
        if (logger.isDebugEnabled()) {
            logger.debug("metadata: {}", (Object)metadata);
        }
        return metadata;
    }

    public void setOutputEncoding(String outputEncoding) {
        this.outputEncoding = outputEncoding;
    }

    public void setReadAsTextIfFailed(boolean readAsTextIfFailed) {
        this.readAsTextIfFailed = readAsTextIfFailed;
    }

    public void setMaxCompressionRatio(long maxCompressionRatio) {
        this.maxCompressionRatio = maxCompressionRatio;
    }

    public void setMaxUncompressionSize(long maxUncompressionSize) {
        this.maxUncompressionSize = maxUncompressionSize;
    }

    public void setInitialBufferSize(int initialBufferSize) {
        this.initialBufferSize = initialBufferSize;
    }

    public void setReplaceDuplication(boolean replaceDuplication) {
        this.replaceDuplication = replaceDuplication;
    }

    public void setMemorySize(int memorySize) {
        this.memorySize = memorySize;
    }

    public void setMaxAlphanumTermSize(int maxAlphanumTermSize) {
        this.maxAlphanumTermSize = maxAlphanumTermSize;
    }

    public void setMaxSymbolTermSize(int maxSymbolTermSize) {
        this.maxSymbolTermSize = maxSymbolTermSize;
    }

    public void setSpaceChars(int[] spaceChars) {
        this.spaceChars = spaceChars;
    }

    public void setTikaConfig(TikaConfig tikaConfig) {
        this.tikaConfig = tikaConfig;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static /* synthetic */ void lambda$getText$3(boolean isByteStream, InputStream inputStream, String enc, File tempFile, Writer writer) throws IOException, TikaException, SAXException {
        BufferedReader br = null;
        try {
            String line;
            if (isByteStream) {
                inputStream.reset();
                br = new BufferedReader(new InputStreamReader(inputStream, enc));
            } else {
                br = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(tempFile), enc));
            }
            while ((line = br.readLine()) != null) {
                writer.write(line);
            }
            CloseableUtil.closeQuietly((Closeable)br);
        }
        catch (Exception e) {
            logger.warn("Could not read " + (tempFile != null ? tempFile.getAbsolutePath() : "a byte stream"), (Throwable)e);
        }
        finally {
            CloseableUtil.closeQuietly(br);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static /* synthetic */ void lambda$getText$2(boolean isByteStream, InputStream inputStream, File tempFile, Parser parser, Metadata metadata3, ParseContext parseContext, Writer writer) throws IOException, TikaException, SAXException {
        InputStream in = null;
        try {
            if (isByteStream) {
                inputStream.reset();
                in = inputStream;
            } else {
                in = new FileInputStream(tempFile);
            }
            parser.parse(in, (ContentHandler)new BodyContentHandler(writer), metadata3, parseContext);
        }
        catch (Throwable throwable) {
            CloseableUtil.closeQuietly(in);
            throw throwable;
        }
        CloseableUtil.closeQuietly((Closeable)in);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static /* synthetic */ void lambda$getText$1(boolean isByteStream, InputStream inputStream, File tempFile, Parser parser, Metadata metadata2, ParseContext parseContext, Writer writer) throws IOException, TikaException, SAXException {
        InputStream in = null;
        try {
            if (isByteStream) {
                inputStream.reset();
                in = inputStream;
            } else {
                in = new FileInputStream(tempFile);
            }
            parser.parse(in, (ContentHandler)new BodyContentHandler(writer), metadata2, parseContext);
        }
        catch (Throwable throwable) {
            CloseableUtil.closeQuietly(in);
            throw throwable;
        }
        CloseableUtil.closeQuietly((Closeable)in);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static /* synthetic */ void lambda$getText$0(boolean isByteStream, File tempFile, InputStream inputStream, Parser parser, Metadata metadata, ParseContext parseContext, Writer writer) throws IOException, TikaException, SAXException {
        InputStream in = null;
        try {
            if (!isByteStream) {
                try (FileOutputStream out = new FileOutputStream(tempFile);){
                    CopyUtil.copy((InputStream)inputStream, (OutputStream)out);
                }
                in = new FileInputStream(tempFile);
            } else {
                in = inputStream;
            }
            parser.parse(in, (ContentHandler)new BodyContentHandler(writer), metadata, parseContext);
        }
        finally {
            CloseableUtil.closeQuietly((Closeable)in);
        }
    }

    protected class TikaDetectParser
    extends CompositeParser {
        private static final long serialVersionUID = 1L;
        private final Detector detector;

        public TikaDetectParser() {
            this(this$0.tikaConfig);
        }

        public TikaDetectParser(TikaConfig config) {
            super(config.getMediaTypeRegistry(), new Parser[]{config.getParser()});
            this.detector = config.getDetector();
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         */
        public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
            TemporaryResources tmp = new TemporaryResources();
            try {
                TikaInputStream tis = TikaInputStream.get((InputStream)stream, (TemporaryResources)tmp);
                MediaType type = this.detector.detect((InputStream)tis, metadata);
                metadata.set("Content-Type", type.toString());
                SecureContentHandler sch = new SecureContentHandler(handler, tis);
                sch.setMaximumCompressionRatio(TikaExtractor.this.maxCompressionRatio);
                sch.setOutputThreshold(TikaExtractor.this.maxUncompressionSize);
                if (context.get(EmbeddedDocumentExtractor.class) == null) {
                    Parser p = (Parser)context.get(Parser.class);
                    if (p == null) {
                        context.set(Parser.class, (Object)this);
                    }
                    context.set(EmbeddedDocumentExtractor.class, (Object)new ParsingEmbeddedDocumentExtractor(context));
                }
                if (logger.isDebugEnabled()) {
                    logger.debug("type: {}, metadata: {}, maxCompressionRatio: {}, maxUncompressionSize: {}", new Object[]{type, metadata, TikaExtractor.this.maxCompressionRatio, TikaExtractor.this.maxUncompressionSize});
                }
                try {
                    super.parse((InputStream)tis, (ContentHandler)sch, metadata, context);
                }
                catch (SAXException e) {
                    sch.throwIfCauseOf(e);
                    throw e;
                }
            }
            finally {
                tmp.dispose();
            }
        }

        public void parse(InputStream stream, ContentHandler handler, Metadata metadata) throws IOException, SAXException, TikaException {
            ParseContext context = new ParseContext();
            context.set(Parser.class, (Object)this);
            this.parse(stream, handler, metadata, context);
        }
    }

    @FunctionalInterface
    protected static interface ContentWriter {
        public void accept(Writer var1) throws IOException, TikaException, SAXException;
    }
}

