/*
 * Decompiled with CFR 0.152.
 */
package org.graylog.shaded.opensearch2.org.apache.lucene.analysis.synonym.word2vec;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Locale;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.graylog.shaded.opensearch2.org.apache.lucene.analysis.synonym.word2vec.Word2VecModel;
import org.graylog.shaded.opensearch2.org.apache.lucene.util.BytesRef;
import org.graylog.shaded.opensearch2.org.apache.lucene.util.TermAndVector;

public class Dl4jModelReader
implements Closeable {
    private static final String MODEL_FILE_NAME_PREFIX = "syn0";
    private final ZipInputStream word2VecModelZipFile;

    public Dl4jModelReader(InputStream stream) {
        this.word2VecModelZipFile = new ZipInputStream(new BufferedInputStream(stream));
    }

    public Word2VecModel read() throws IOException {
        ZipEntry entry;
        while ((entry = this.word2VecModelZipFile.getNextEntry()) != null) {
            String[] tokens;
            String fileName = entry.getName();
            if (!fileName.startsWith(MODEL_FILE_NAME_PREFIX)) continue;
            BufferedReader reader = new BufferedReader(new InputStreamReader((InputStream)this.word2VecModelZipFile, StandardCharsets.UTF_8));
            String header = reader.readLine();
            String[] headerValues = header.split(" ");
            int dictionarySize = Integer.parseInt(headerValues[0]);
            int vectorDimension = Integer.parseInt(headerValues[1]);
            Word2VecModel model = new Word2VecModel(dictionarySize, vectorDimension);
            String line = reader.readLine();
            boolean isTermB64Encoded = false;
            if (line != null) {
                tokens = line.split(" ");
                isTermB64Encoded = tokens[0].substring(0, 3).toLowerCase(Locale.ROOT).compareTo("b64") == 0;
                model.addTermAndVector(Dl4jModelReader.extractTermAndVector(tokens, vectorDimension, isTermB64Encoded));
            }
            while ((line = reader.readLine()) != null) {
                tokens = line.split(" ");
                model.addTermAndVector(Dl4jModelReader.extractTermAndVector(tokens, vectorDimension, isTermB64Encoded));
            }
            return model;
        }
        throw new IllegalArgumentException("Cannot read Dl4j word2vec model - 'syn0' file is missing in the zip. 'syn0' is a mandatory file containing the mapping between terms and vectors generated by the DL4j library.");
    }

    private static TermAndVector extractTermAndVector(String[] tokens, int vectorDimension, boolean isTermB64Encoded) {
        BytesRef term = isTermB64Encoded ? Dl4jModelReader.decodeB64Term(tokens[0]) : new BytesRef(tokens[0]);
        float[] vector = new float[tokens.length - 1];
        if (vectorDimension != vector.length) {
            throw new RuntimeException(String.format(Locale.ROOT, "Word2Vec model file corrupted. Declared vectors of size %d but found vector of size %d for word %s (%s)", vectorDimension, vector.length, tokens[0], term.utf8ToString()));
        }
        for (int i = 1; i < tokens.length; ++i) {
            vector[i - 1] = Float.parseFloat(tokens[i]);
        }
        return new TermAndVector(term, vector);
    }

    static BytesRef decodeB64Term(String term) {
        byte[] buffer = Base64.getDecoder().decode(term.substring(4));
        return new BytesRef(buffer, 0, buffer.length);
    }

    @Override
    public void close() throws IOException {
        this.word2VecModelZipFile.close();
    }
}

