package org.apache.solr.handler.clustering.carrot2;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.io.Closeables;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.ObjectUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.handler.clustering.SearchClusteringEngine;
import org.apache.solr.handler.component.HighlightComponent;
import org.apache.solr.highlight.SolrHighlighter;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocSlice;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.SolrPluginUtils;
import org.carrot2.core.Cluster;
import org.carrot2.core.Controller;
import org.carrot2.core.ControllerFactory;
import org.carrot2.core.Document;
import org.carrot2.core.IClusteringAlgorithm;
import org.carrot2.core.LanguageCode;
import org.carrot2.text.linguistic.DefaultLexicalDataFactoryDescriptor;
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor;
import org.carrot2.util.resource.ClassLoaderLocator;
import org.carrot2.util.resource.IResource;
import org.carrot2.util.resource.IResourceLocator;
import org.carrot2.util.resource.ResourceLookup;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.class */
public class CarrotClusteringEngine extends SearchClusteringEngine {
    private static transient Logger log = LoggerFactory.getLogger(CarrotClusteringEngine.class);
    private static final String CARROT_RESOURCES_PREFIX = "clustering/carrot2";
    private static final String SOLR_DOCUMENT_ID = "solrId";
    private String idFieldName;
    private Controller controller = ControllerFactory.createPooling();
    private Class<? extends IClusteringAlgorithm> clusteringAlgorithmClass;
    private SolrCore core;

    /* loaded from: input_file:org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine$SolrResourceLocator.class */
    private static class SolrResourceLocator implements IResourceLocator {
        private final SolrResourceLoader resourceLoader;
        private final String carrot2ResourcesDir;

        public SolrResourceLocator(SolrCore solrCore, SolrParams solrParams) {
            this.resourceLoader = solrCore.getResourceLoader();
            this.carrot2ResourcesDir = solrParams.get(CarrotParams.LEXICAL_RESOURCES_DIR, CarrotClusteringEngine.CARROT_RESOURCES_PREFIX);
        }

        public IResource[] getAll(String str) {
            final String str2 = this.carrot2ResourcesDir + "/" + str;
            CarrotClusteringEngine.log.debug("Looking for Solr resource: " + str2);
            InputStream inputStream = null;
            try {
                try {
                    inputStream = this.resourceLoader.openResource(str2);
                    final byte[] byteArray = IOUtils.toByteArray(inputStream);
                    if (inputStream != null) {
                        Closeables.closeQuietly(inputStream);
                    }
                    CarrotClusteringEngine.log.info("Loaded Solr resource: " + str2);
                    return new IResource[]{new IResource() { // from class: org.apache.solr.handler.clustering.carrot2.CarrotClusteringEngine.SolrResourceLocator.1
                        public InputStream open() throws IOException {
                            return new ByteArrayInputStream(byteArray);
                        }

                        public int hashCode() {
                            return super.hashCode();
                        }

                        public boolean equals(Object obj) {
                            return super.equals(obj);
                        }

                        public String toString() {
                            return "Solr config resource: " + str2;
                        }
                    }};
                } catch (IOException e) {
                    CarrotClusteringEngine.log.warn("Could not read Solr resource " + str2);
                    IResource[] iResourceArr = new IResource[0];
                    if (inputStream != null) {
                        Closeables.closeQuietly(inputStream);
                    }
                    return iResourceArr;
                } catch (RuntimeException e2) {
                    CarrotClusteringEngine.log.debug("Resource not found in Solr's config: " + str2 + ". Using the default " + str + " from Carrot JAR.");
                    IResource[] iResourceArr2 = new IResource[0];
                    if (inputStream != null) {
                        Closeables.closeQuietly(inputStream);
                    }
                    return iResourceArr2;
                }
            } catch (Throwable th) {
                if (inputStream != null) {
                    Closeables.closeQuietly(inputStream);
                }
                throw th;
            }
        }

        public int hashCode() {
            return super.hashCode();
        }

        public boolean equals(Object obj) {
            return super.equals(obj);
        }

        public String toString() {
            String str = "";
            try {
                str = "configDir=" + new File(this.resourceLoader.getConfigDir()).getAbsolutePath() + ", ";
            } catch (Exception e) {
            }
            return "SolrResourceLocator, " + str + "Carrot2 relative lexicalResourcesDir=" + this.carrot2ResourcesDir;
        }
    }

    @Override // org.apache.solr.handler.clustering.SearchClusteringEngine
    @Deprecated
    public Object cluster(Query query, DocList docList, SolrQueryRequest solrQueryRequest) {
        SolrIndexSearcher searcher = solrQueryRequest.getSearcher();
        try {
            HashMap hashMap = new HashMap(docList.size());
            return cluster(query, SolrPluginUtils.docListToSolrDocumentList(docList, searcher, getFieldsToLoad(solrQueryRequest), hashMap), hashMap, solrQueryRequest);
        } catch (IOException e) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
        }
    }

    @Override // org.apache.solr.handler.clustering.SearchClusteringEngine
    public Object cluster(Query query, SolrDocumentList solrDocumentList, Map<SolrDocument, Integer> map, SolrQueryRequest solrQueryRequest) {
        try {
            HashMap hashMap = new HashMap();
            hashMap.put("documents", getDocuments(solrDocumentList, map, query, solrQueryRequest));
            hashMap.put("query", query.toString());
            hashMap.put("solrFieldNames", getFieldsForClustering(solrQueryRequest));
            extractCarrotAttributes(solrQueryRequest.getParams(), hashMap);
            Thread currentThread = Thread.currentThread();
            ClassLoader contextClassLoader = currentThread.getContextClassLoader();
            try {
                currentThread.setContextClassLoader(this.core.getResourceLoader().getClassLoader());
                List<NamedList<Object>> clustersToNamedList = clustersToNamedList(this.controller.process(hashMap, new Class[]{this.clusteringAlgorithmClass}).getClusters(), solrQueryRequest.getParams());
                currentThread.setContextClassLoader(contextClassLoader);
                return clustersToNamedList;
            } catch (Throwable th) {
                currentThread.setContextClassLoader(contextClassLoader);
                throw th;
            }
        } catch (Exception e) {
            log.error("Carrot2 clustering failed", e);
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
        }
    }

    @Override // org.apache.solr.handler.clustering.ClusteringEngine
    public String init(NamedList namedList, SolrCore solrCore) {
        this.core = solrCore;
        String init = super.init(namedList, solrCore);
        SolrParams solrParams = SolrParams.toSolrParams(namedList);
        HashMap hashMap = new HashMap();
        extractCarrotAttributes(solrParams, hashMap);
        BasicPreprocessingPipelineDescriptor.AttributeBuilder attributeBuilder = BasicPreprocessingPipelineDescriptor.attributeBuilder(hashMap);
        attributeBuilder.lexicalDataFactory(SolrStopwordsCarrot2LexicalDataFactory.class);
        if (!hashMap.containsKey("PreprocessingPipeline.tokenizerFactory")) {
            attributeBuilder.tokenizerFactory(LuceneCarrot2TokenizerFactory.class);
        }
        if (!hashMap.containsKey("PreprocessingPipeline.stemmerFactory")) {
            attributeBuilder.stemmerFactory(LuceneCarrot2StemmerFactory.class);
        }
        hashMap.put("solrIndexSchema", solrCore.getSchema());
        DefaultLexicalDataFactoryDescriptor.attributeBuilder(hashMap).resourceLookup(new ResourceLookup(new IResourceLocator[]{new SolrResourceLocator(solrCore, solrParams), new ClassLoaderLocator(solrCore.getResourceLoader().getClassLoader())}));
        Thread currentThread = Thread.currentThread();
        ClassLoader contextClassLoader = currentThread.getContextClassLoader();
        try {
            currentThread.setContextClassLoader(solrCore.getResourceLoader().getClassLoader());
            this.controller.init(hashMap);
            currentThread.setContextClassLoader(contextClassLoader);
            SchemaField uniqueKeyField = solrCore.getSchema().getUniqueKeyField();
            if (uniqueKeyField == null) {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, CarrotClusteringEngine.class.getSimpleName() + " requires the schema to have a uniqueKeyField");
            }
            this.idFieldName = uniqueKeyField.getName();
            this.clusteringAlgorithmClass = solrCore.getResourceLoader().findClass(solrParams.get(CarrotParams.ALGORITHM), IClusteringAlgorithm.class, new String[0]);
            return init;
        } catch (Throwable th) {
            currentThread.setContextClassLoader(contextClassLoader);
            throw th;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.solr.handler.clustering.SearchClusteringEngine
    public Set<String> getFieldsToLoad(SolrQueryRequest solrQueryRequest) {
        SolrParams params = solrQueryRequest.getParams();
        HashSet newHashSet = Sets.newHashSet(getFieldsForClustering(solrQueryRequest));
        newHashSet.add(this.idFieldName);
        newHashSet.add(params.get(CarrotParams.URL_FIELD_NAME, "url"));
        newHashSet.addAll(getCustomFieldsMap(params).keySet());
        String str = params.get(CarrotParams.LANGUAGE_FIELD_NAME);
        if (StringUtils.isNotBlank(str)) {
            newHashSet.add(str);
        }
        return newHashSet;
    }

    private Set<String> getFieldsForClustering(SolrQueryRequest solrQueryRequest) {
        SolrParams params = solrQueryRequest.getParams();
        String str = params.get(CarrotParams.TITLE_FIELD_NAME, "title");
        String str2 = params.get(CarrotParams.SNIPPET_FIELD_NAME, str);
        if (StringUtils.isBlank(str2)) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, CarrotParams.SNIPPET_FIELD_NAME + " must not be blank.");
        }
        HashSet newHashSet = Sets.newHashSet();
        newHashSet.addAll(Arrays.asList(str.split("[, ]")));
        newHashSet.addAll(Arrays.asList(str2.split("[, ]")));
        return newHashSet;
    }

    private List<Document> getDocuments(SolrDocumentList solrDocumentList, Map<SolrDocument, Integer> map, Query query, final SolrQueryRequest solrQueryRequest) throws IOException {
        Collection fieldValues;
        SolrHighlighter solrHighlighter = null;
        SolrParams params = solrQueryRequest.getParams();
        SolrCore core = solrQueryRequest.getCore();
        String str = params.get(CarrotParams.URL_FIELD_NAME, "url");
        String str2 = params.get(CarrotParams.TITLE_FIELD_NAME, "title");
        String str3 = params.get(CarrotParams.SNIPPET_FIELD_NAME, str2);
        String str4 = params.get(CarrotParams.LANGUAGE_FIELD_NAME, (String) null);
        Map<String, String> customFieldsMap = getCustomFieldsMap(params);
        HashMap newHashMap = Maps.newHashMap();
        if (StringUtils.isNotBlank(str4)) {
            for (String str5 : params.get(CarrotParams.LANGUAGE_CODE_MAP, "").split("[, ]")) {
                String[] split = str5.split(":");
                if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) {
                    newHashMap.put(split[0], split[1]);
                } else {
                    log.warn("Unsupported format for " + CarrotParams.LANGUAGE_CODE_MAP + ": '" + str5 + "'. Skipping this mapping.");
                }
            }
        }
        boolean bool = params.getBool(CarrotParams.PRODUCE_SUMMARY, false);
        LocalSolrQueryRequest localSolrQueryRequest = null;
        String[] strArr = null;
        if (bool) {
            solrHighlighter = HighlightComponent.getHighlighter(core);
            if (solrHighlighter != null) {
                HashMap newHashMap2 = Maps.newHashMap();
                strArr = str3.split("[, ]");
                newHashMap2.put("hl.fl", strArr);
                newHashMap2.put("hl", "true");
                newHashMap2.put("hl.simple.pre", "");
                newHashMap2.put("hl.simple.post", "");
                newHashMap2.put("hl.fragsize", Integer.valueOf(params.getInt(CarrotParams.SUMMARY_FRAGSIZE, params.getInt("hl.fragsize", 100))));
                newHashMap2.put("hl.snippets", Integer.valueOf(params.getInt(CarrotParams.SUMMARY_SNIPPETS, params.getInt("hl.snippets", 1))));
                localSolrQueryRequest = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, newHashMap2) { // from class: org.apache.solr.handler.clustering.carrot2.CarrotClusteringEngine.1
                    public SolrIndexSearcher getSearcher() {
                        return solrQueryRequest.getSearcher();
                    }
                };
            } else {
                log.warn("No highlighter configured, cannot produce summary");
                bool = false;
            }
        }
        Iterator it = solrDocumentList.iterator();
        ArrayList arrayList = new ArrayList(solrDocumentList.size());
        float[] fArr = {1.0f};
        int[] iArr = new int[1];
        while (it.hasNext()) {
            SolrDocument solrDocument = (SolrDocument) it.next();
            String str6 = null;
            if (bool && map != null) {
                iArr[0] = map.get(solrDocument).intValue();
                NamedList doHighlighting = solrHighlighter.doHighlighting(new DocSlice(0, 1, iArr, fArr, 1, 1.0f), query, localSolrQueryRequest, strArr);
                if (doHighlighting != null && doHighlighting.size() == 1) {
                    NamedList namedList = (NamedList) doHighlighting.getVal(0);
                    StringBuilder sb = new StringBuilder();
                    for (String str7 : strArr) {
                        String[] strArr2 = (String[]) namedList.get(str7);
                        if (strArr2 != null && strArr2.length > 0) {
                            for (String str8 : strArr2) {
                                sb.append(str8);
                                sb.append(" . ");
                            }
                        }
                    }
                    str6 = sb.toString();
                }
            }
            if (str6 == null) {
                str6 = getConcatenated(solrDocument, str3);
            }
            Document document = new Document(getConcatenated(solrDocument, str2), str6, ObjectUtils.toString(solrDocument.getFieldValue(str), ""));
            document.setField(SOLR_DOCUMENT_ID, solrDocument.getFieldValue(this.idFieldName));
            if (StringUtils.isNotBlank(str4) && (fieldValues = solrDocument.getFieldValues(str4)) != null) {
                Iterator it2 = fieldValues.iterator();
                while (true) {
                    if (!it2.hasNext()) {
                        break;
                    }
                    String objectUtils = ObjectUtils.toString(it2.next(), "");
                    if (newHashMap.containsKey(objectUtils)) {
                        objectUtils = (String) newHashMap.get(objectUtils);
                    }
                    if (objectUtils.indexOf(45) > 0) {
                        objectUtils = objectUtils.replace('-', '_');
                    }
                    LanguageCode forISOCode = LanguageCode.forISOCode(objectUtils);
                    if (forISOCode != null) {
                        document.setLanguage(forISOCode);
                        break;
                    }
                }
            }
            if (customFieldsMap != null) {
                for (Map.Entry<String, String> entry : customFieldsMap.entrySet()) {
                    document.setField(entry.getValue(), solrDocument.getFieldValue(entry.getKey()));
                }
            }
            arrayList.add(document);
        }
        return arrayList;
    }

    private Map<String, String> getCustomFieldsMap(SolrParams solrParams) {
        HashMap newHashMap = Maps.newHashMap();
        String[] params = solrParams.getParams(CarrotParams.CUSTOM_FIELD_NAME);
        if (params != null) {
            newHashMap = Maps.newHashMap();
            for (String str : params) {
                String[] split = str.split(":");
                if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) {
                    newHashMap.put(split[0], split[1]);
                } else {
                    log.warn("Unsupported format for " + CarrotParams.CUSTOM_FIELD_NAME + ": '" + str + "'. Skipping this field definition.");
                }
            }
        }
        return newHashMap;
    }

    private String getConcatenated(SolrDocument solrDocument, String str) {
        StringBuilder sb = new StringBuilder();
        for (String str2 : str.split("[, ]")) {
            Collection fieldValues = solrDocument.getFieldValues(str2);
            if (fieldValues != null) {
                Iterator it = fieldValues.iterator();
                while (it.hasNext()) {
                    sb.append(ObjectUtils.toString(it.next())).append(" . ");
                }
            }
        }
        return sb.toString().trim();
    }

    private List<NamedList<Object>> clustersToNamedList(List<Cluster> list, SolrParams solrParams) {
        ArrayList newArrayList = Lists.newArrayList();
        clustersToNamedList(list, newArrayList, solrParams.getBool(CarrotParams.OUTPUT_SUB_CLUSTERS, true), solrParams.getInt(CarrotParams.NUM_DESCRIPTIONS, Integer.MAX_VALUE));
        return newArrayList;
    }

    private void clustersToNamedList(List<Cluster> list, List<NamedList<Object>> list2, boolean z, int i) {
        for (Cluster cluster : list) {
            SimpleOrderedMap simpleOrderedMap = new SimpleOrderedMap();
            list2.add(simpleOrderedMap);
            List phrases = cluster.getPhrases();
            if (phrases.size() > i) {
                phrases = phrases.subList(0, i);
            }
            simpleOrderedMap.add("labels", phrases);
            Double score = cluster.getScore();
            if (score != null) {
                simpleOrderedMap.add("score", score);
            }
            if (cluster.isOtherTopics()) {
                simpleOrderedMap.add("other-topics", Boolean.valueOf(cluster.isOtherTopics()));
            }
            List documents = z ? cluster.getDocuments() : cluster.getAllDocuments();
            ArrayList newArrayList = Lists.newArrayList();
            simpleOrderedMap.add("docs", newArrayList);
            Iterator it = documents.iterator();
            while (it.hasNext()) {
                newArrayList.add(((Document) it.next()).getField(SOLR_DOCUMENT_ID));
            }
            if (z && !cluster.getSubclusters().isEmpty()) {
                ArrayList newArrayList2 = Lists.newArrayList();
                simpleOrderedMap.add("clusters", newArrayList2);
                clustersToNamedList(cluster.getSubclusters(), newArrayList2, z, i);
            }
        }
    }

    private void extractCarrotAttributes(SolrParams solrParams, Map<String, Object> map) {
        Iterator parameterNamesIterator = solrParams.getParameterNamesIterator();
        while (parameterNamesIterator.hasNext()) {
            String str = (String) parameterNamesIterator.next();
            if (!CarrotParams.CARROT_PARAM_NAMES.contains(str)) {
                map.put(str, solrParams.get(str));
            }
        }
    }
}
