/*
 * Decompiled with CFR 0.152.
 */
package org.graylog.shaded.opensearch2.org.apache.lucene.util;

import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.logging.Logger;
import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.FloatVector;
import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.ShortVector;
import jdk.incubator.vector.Vector;
import jdk.incubator.vector.VectorOperators;
import jdk.incubator.vector.VectorShape;
import jdk.incubator.vector.VectorSpecies;
import org.graylog.shaded.opensearch2.org.apache.lucene.util.Constants;
import org.graylog.shaded.opensearch2.org.apache.lucene.util.SuppressForbidden;
import org.graylog.shaded.opensearch2.org.apache.lucene.util.VectorUtilProvider;

final class VectorUtilPanamaProvider
implements VectorUtilProvider {
    private static final int INT_SPECIES_PREF_BIT_SIZE = IntVector.SPECIES_PREFERRED.vectorBitSize();
    private static final VectorSpecies<Float> PREF_FLOAT_SPECIES = FloatVector.SPECIES_PREFERRED;
    private static final VectorSpecies<Byte> PREF_BYTE_SPECIES;
    private static final VectorSpecies<Short> PREF_SHORT_SPECIES;
    private final boolean hasFastIntegerVectors;

    @SuppressForbidden(reason="security manager")
    private static <T> T doPrivileged(PrivilegedAction<T> action) {
        return AccessController.doPrivileged(action);
    }

    VectorUtilPanamaProvider(boolean testMode) {
        if (!testMode && INT_SPECIES_PREF_BIT_SIZE < 128) {
            throw new UnsupportedOperationException("Vector bit size is less than 128: " + INT_SPECIES_PREF_BIT_SIZE);
        }
        try {
            VectorUtilPanamaProvider.doPrivileged(() -> FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])new float[PREF_FLOAT_SPECIES.length()], (int)0));
        }
        catch (SecurityException se) {
            throw new UnsupportedOperationException("We hit initialization failure described in JDK-8309727: " + se);
        }
        boolean isAMD64withoutAVX2 = Constants.OS_ARCH.equals("amd64") && INT_SPECIES_PREF_BIT_SIZE < 256;
        this.hasFastIntegerVectors = testMode || false == isAMD64withoutAVX2;
        Logger log = Logger.getLogger(this.getClass().getName());
        log.info("Java vector incubator API enabled" + (testMode ? " (test mode)" : "") + "; uses preferredBitSize=" + INT_SPECIES_PREF_BIT_SIZE);
    }

    @Override
    public float dotProduct(float[] a, float[] b) {
        int i;
        float res = 0.0f;
        if (a.length > 2 * PREF_FLOAT_SPECIES.length()) {
            FloatVector vb;
            FloatVector va;
            FloatVector acc1 = FloatVector.zero(PREF_FLOAT_SPECIES);
            FloatVector acc2 = FloatVector.zero(PREF_FLOAT_SPECIES);
            FloatVector acc3 = FloatVector.zero(PREF_FLOAT_SPECIES);
            FloatVector acc4 = FloatVector.zero(PREF_FLOAT_SPECIES);
            int upperBound = PREF_FLOAT_SPECIES.loopBound(a.length - 3 * PREF_FLOAT_SPECIES.length());
            for (i = 0; i < upperBound; i += 4 * PREF_FLOAT_SPECIES.length()) {
                va = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])a, (int)i);
                vb = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])b, (int)i);
                acc1 = acc1.add((Vector)va.mul((Vector)vb));
                FloatVector vc = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])a, (int)(i + PREF_FLOAT_SPECIES.length()));
                FloatVector vd = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])b, (int)(i + PREF_FLOAT_SPECIES.length()));
                acc2 = acc2.add((Vector)vc.mul((Vector)vd));
                FloatVector ve = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])a, (int)(i + 2 * PREF_FLOAT_SPECIES.length()));
                FloatVector vf = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])b, (int)(i + 2 * PREF_FLOAT_SPECIES.length()));
                acc3 = acc3.add((Vector)ve.mul((Vector)vf));
                FloatVector vg = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])a, (int)(i + 3 * PREF_FLOAT_SPECIES.length()));
                FloatVector vh = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])b, (int)(i + 3 * PREF_FLOAT_SPECIES.length()));
                acc4 = acc4.add((Vector)vg.mul((Vector)vh));
            }
            upperBound = PREF_FLOAT_SPECIES.loopBound(a.length);
            while (i < upperBound) {
                va = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])a, (int)i);
                vb = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])b, (int)i);
                acc1 = acc1.add((Vector)va.mul((Vector)vb));
                i += PREF_FLOAT_SPECIES.length();
            }
            FloatVector res1 = acc1.add((Vector)acc2);
            FloatVector res2 = acc3.add((Vector)acc4);
            res += res1.add((Vector)res2).reduceLanes(VectorOperators.ADD);
        }
        while (i < a.length) {
            res += b[i] * a[i];
            ++i;
        }
        return res;
    }

    @Override
    public float cosine(float[] a, float[] b) {
        int i;
        float sum = 0.0f;
        float norm1 = 0.0f;
        float norm2 = 0.0f;
        if (a.length > 2 * PREF_FLOAT_SPECIES.length()) {
            FloatVector vb;
            FloatVector va;
            FloatVector sum1 = FloatVector.zero(PREF_FLOAT_SPECIES);
            FloatVector sum2 = FloatVector.zero(PREF_FLOAT_SPECIES);
            FloatVector sum3 = FloatVector.zero(PREF_FLOAT_SPECIES);
            FloatVector sum4 = FloatVector.zero(PREF_FLOAT_SPECIES);
            FloatVector norm1_1 = FloatVector.zero(PREF_FLOAT_SPECIES);
            FloatVector norm1_2 = FloatVector.zero(PREF_FLOAT_SPECIES);
            FloatVector norm1_3 = FloatVector.zero(PREF_FLOAT_SPECIES);
            FloatVector norm1_4 = FloatVector.zero(PREF_FLOAT_SPECIES);
            FloatVector norm2_1 = FloatVector.zero(PREF_FLOAT_SPECIES);
            FloatVector norm2_2 = FloatVector.zero(PREF_FLOAT_SPECIES);
            FloatVector norm2_3 = FloatVector.zero(PREF_FLOAT_SPECIES);
            FloatVector norm2_4 = FloatVector.zero(PREF_FLOAT_SPECIES);
            int upperBound = PREF_FLOAT_SPECIES.loopBound(a.length - 3 * PREF_FLOAT_SPECIES.length());
            for (i = 0; i < upperBound; i += 4 * PREF_FLOAT_SPECIES.length()) {
                va = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])a, (int)i);
                vb = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])b, (int)i);
                sum1 = sum1.add((Vector)va.mul((Vector)vb));
                norm1_1 = norm1_1.add((Vector)va.mul((Vector)va));
                norm2_1 = norm2_1.add((Vector)vb.mul((Vector)vb));
                FloatVector vc = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])a, (int)(i + PREF_FLOAT_SPECIES.length()));
                FloatVector vd = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])b, (int)(i + PREF_FLOAT_SPECIES.length()));
                sum2 = sum2.add((Vector)vc.mul((Vector)vd));
                norm1_2 = norm1_2.add((Vector)vc.mul((Vector)vc));
                norm2_2 = norm2_2.add((Vector)vd.mul((Vector)vd));
                FloatVector ve = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])a, (int)(i + 2 * PREF_FLOAT_SPECIES.length()));
                FloatVector vf = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])b, (int)(i + 2 * PREF_FLOAT_SPECIES.length()));
                sum3 = sum3.add((Vector)ve.mul((Vector)vf));
                norm1_3 = norm1_3.add((Vector)ve.mul((Vector)ve));
                norm2_3 = norm2_3.add((Vector)vf.mul((Vector)vf));
                FloatVector vg = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])a, (int)(i + 3 * PREF_FLOAT_SPECIES.length()));
                FloatVector vh = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])b, (int)(i + 3 * PREF_FLOAT_SPECIES.length()));
                sum4 = sum4.add((Vector)vg.mul((Vector)vh));
                norm1_4 = norm1_4.add((Vector)vg.mul((Vector)vg));
                norm2_4 = norm2_4.add((Vector)vh.mul((Vector)vh));
            }
            upperBound = PREF_FLOAT_SPECIES.loopBound(a.length);
            while (i < upperBound) {
                va = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])a, (int)i);
                vb = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])b, (int)i);
                sum1 = sum1.add((Vector)va.mul((Vector)vb));
                norm1_1 = norm1_1.add((Vector)va.mul((Vector)va));
                norm2_1 = norm2_1.add((Vector)vb.mul((Vector)vb));
                i += PREF_FLOAT_SPECIES.length();
            }
            FloatVector sumres1 = sum1.add((Vector)sum2);
            FloatVector sumres2 = sum3.add((Vector)sum4);
            FloatVector norm1res1 = norm1_1.add((Vector)norm1_2);
            FloatVector norm1res2 = norm1_3.add((Vector)norm1_4);
            FloatVector norm2res1 = norm2_1.add((Vector)norm2_2);
            FloatVector norm2res2 = norm2_3.add((Vector)norm2_4);
            sum += sumres1.add((Vector)sumres2).reduceLanes(VectorOperators.ADD);
            norm1 += norm1res1.add((Vector)norm1res2).reduceLanes(VectorOperators.ADD);
            norm2 += norm2res1.add((Vector)norm2res2).reduceLanes(VectorOperators.ADD);
        }
        while (i < a.length) {
            float elem1 = a[i];
            float elem2 = b[i];
            sum += elem1 * elem2;
            norm1 += elem1 * elem1;
            norm2 += elem2 * elem2;
            ++i;
        }
        return (float)((double)sum / Math.sqrt((double)norm1 * (double)norm2));
    }

    @Override
    public float squareDistance(float[] a, float[] b) {
        int i;
        float res = 0.0f;
        if (a.length > 2 * PREF_FLOAT_SPECIES.length()) {
            FloatVector vb;
            FloatVector va;
            FloatVector acc1 = FloatVector.zero(PREF_FLOAT_SPECIES);
            FloatVector acc2 = FloatVector.zero(PREF_FLOAT_SPECIES);
            FloatVector acc3 = FloatVector.zero(PREF_FLOAT_SPECIES);
            FloatVector acc4 = FloatVector.zero(PREF_FLOAT_SPECIES);
            int upperBound = PREF_FLOAT_SPECIES.loopBound(a.length - 3 * PREF_FLOAT_SPECIES.length());
            for (i = 0; i < upperBound; i += 4 * PREF_FLOAT_SPECIES.length()) {
                va = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])a, (int)i);
                vb = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])b, (int)i);
                FloatVector diff1 = va.sub((Vector)vb);
                acc1 = acc1.add((Vector)diff1.mul((Vector)diff1));
                FloatVector vc = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])a, (int)(i + PREF_FLOAT_SPECIES.length()));
                FloatVector vd = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])b, (int)(i + PREF_FLOAT_SPECIES.length()));
                FloatVector diff2 = vc.sub((Vector)vd);
                acc2 = acc2.add((Vector)diff2.mul((Vector)diff2));
                FloatVector ve = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])a, (int)(i + 2 * PREF_FLOAT_SPECIES.length()));
                FloatVector vf = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])b, (int)(i + 2 * PREF_FLOAT_SPECIES.length()));
                FloatVector diff3 = ve.sub((Vector)vf);
                acc3 = acc3.add((Vector)diff3.mul((Vector)diff3));
                FloatVector vg = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])a, (int)(i + 3 * PREF_FLOAT_SPECIES.length()));
                FloatVector vh = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])b, (int)(i + 3 * PREF_FLOAT_SPECIES.length()));
                FloatVector diff4 = vg.sub((Vector)vh);
                acc4 = acc4.add((Vector)diff4.mul((Vector)diff4));
            }
            upperBound = PREF_FLOAT_SPECIES.loopBound(a.length);
            while (i < upperBound) {
                va = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])a, (int)i);
                vb = FloatVector.fromArray(PREF_FLOAT_SPECIES, (float[])b, (int)i);
                FloatVector diff = va.sub((Vector)vb);
                acc1 = acc1.add((Vector)diff.mul((Vector)diff));
                i += PREF_FLOAT_SPECIES.length();
            }
            FloatVector res1 = acc1.add((Vector)acc2);
            FloatVector res2 = acc3.add((Vector)acc4);
            res += res1.add((Vector)res2).reduceLanes(VectorOperators.ADD);
        }
        while (i < a.length) {
            float diff = a[i] - b[i];
            res += diff * diff;
            ++i;
        }
        return res;
    }

    @Override
    public int dotProduct(byte[] a, byte[] b) {
        int i;
        int res = 0;
        if (a.length >= 16 && this.hasFastIntegerVectors) {
            int upperBound;
            if (INT_SPECIES_PREF_BIT_SIZE >= 256) {
                upperBound = PREF_BYTE_SPECIES.loopBound(a.length);
                IntVector acc = IntVector.zero((VectorSpecies)IntVector.SPECIES_PREFERRED);
                for (i = 0; i < upperBound; i += PREF_BYTE_SPECIES.length()) {
                    ByteVector va8 = ByteVector.fromArray(PREF_BYTE_SPECIES, (byte[])a, (int)i);
                    ByteVector vb8 = ByteVector.fromArray(PREF_BYTE_SPECIES, (byte[])b, (int)i);
                    Vector va16 = va8.convertShape(VectorOperators.B2S, PREF_SHORT_SPECIES, 0);
                    Vector vb16 = vb8.convertShape(VectorOperators.B2S, PREF_SHORT_SPECIES, 0);
                    Vector prod16 = va16.mul(vb16);
                    Vector prod32 = prod16.convertShape(VectorOperators.S2I, IntVector.SPECIES_PREFERRED, 0);
                    acc = acc.add(prod32);
                }
                res += acc.reduceLanes(VectorOperators.ADD);
            } else {
                upperBound = ByteVector.SPECIES_64.loopBound(a.length);
                IntVector acc1 = IntVector.zero((VectorSpecies)IntVector.SPECIES_128);
                IntVector acc2 = IntVector.zero((VectorSpecies)IntVector.SPECIES_128);
                while (i < upperBound) {
                    ByteVector va8 = ByteVector.fromArray((VectorSpecies)ByteVector.SPECIES_64, (byte[])a, (int)i);
                    ByteVector vb8 = ByteVector.fromArray((VectorSpecies)ByteVector.SPECIES_64, (byte[])b, (int)i);
                    Vector va16 = va8.convertShape(VectorOperators.B2S, ShortVector.SPECIES_128, 0);
                    Vector vb16 = vb8.convertShape(VectorOperators.B2S, ShortVector.SPECIES_128, 0);
                    Vector prod16 = va16.mul(vb16);
                    Vector prod32_1 = prod16.convertShape(VectorOperators.S2I, IntVector.SPECIES_128, 0);
                    Vector prod32_2 = prod16.convertShape(VectorOperators.S2I, IntVector.SPECIES_128, 1);
                    acc1 = acc1.add(prod32_1);
                    acc2 = acc2.add(prod32_2);
                    i += ByteVector.SPECIES_64.length();
                }
                res += acc1.add((Vector)acc2).reduceLanes(VectorOperators.ADD);
            }
        }
        while (i < a.length) {
            res += b[i] * a[i];
            ++i;
        }
        return res;
    }

    @Override
    public float cosine(byte[] a, byte[] b) {
        int i;
        int sum = 0;
        int norm1 = 0;
        int norm2 = 0;
        if (a.length >= 16 && this.hasFastIntegerVectors) {
            int upperBound;
            if (INT_SPECIES_PREF_BIT_SIZE >= 256) {
                upperBound = PREF_BYTE_SPECIES.loopBound(a.length);
                IntVector accSum = IntVector.zero((VectorSpecies)IntVector.SPECIES_PREFERRED);
                IntVector accNorm1 = IntVector.zero((VectorSpecies)IntVector.SPECIES_PREFERRED);
                IntVector accNorm2 = IntVector.zero((VectorSpecies)IntVector.SPECIES_PREFERRED);
                for (i = 0; i < upperBound; i += PREF_BYTE_SPECIES.length()) {
                    ByteVector va8 = ByteVector.fromArray(PREF_BYTE_SPECIES, (byte[])a, (int)i);
                    ByteVector vb8 = ByteVector.fromArray(PREF_BYTE_SPECIES, (byte[])b, (int)i);
                    Vector va16 = va8.convertShape(VectorOperators.B2S, PREF_SHORT_SPECIES, 0);
                    Vector vb16 = vb8.convertShape(VectorOperators.B2S, PREF_SHORT_SPECIES, 0);
                    Vector prod16 = va16.mul(vb16);
                    Vector norm1_16 = va16.mul(va16);
                    Vector norm2_16 = vb16.mul(vb16);
                    Vector prod32 = prod16.convertShape(VectorOperators.S2I, IntVector.SPECIES_PREFERRED, 0);
                    Vector norm1_32 = norm1_16.convertShape(VectorOperators.S2I, IntVector.SPECIES_PREFERRED, 0);
                    Vector norm2_32 = norm2_16.convertShape(VectorOperators.S2I, IntVector.SPECIES_PREFERRED, 0);
                    accSum = accSum.add(prod32);
                    accNorm1 = accNorm1.add(norm1_32);
                    accNorm2 = accNorm2.add(norm2_32);
                }
                sum += accSum.reduceLanes(VectorOperators.ADD);
                norm1 += accNorm1.reduceLanes(VectorOperators.ADD);
                norm2 += accNorm2.reduceLanes(VectorOperators.ADD);
            } else {
                upperBound = ByteVector.SPECIES_64.loopBound(a.length);
                IntVector accSum1 = IntVector.zero((VectorSpecies)IntVector.SPECIES_128);
                IntVector accSum2 = IntVector.zero((VectorSpecies)IntVector.SPECIES_128);
                IntVector accNorm1_1 = IntVector.zero((VectorSpecies)IntVector.SPECIES_128);
                IntVector accNorm1_2 = IntVector.zero((VectorSpecies)IntVector.SPECIES_128);
                IntVector accNorm2_1 = IntVector.zero((VectorSpecies)IntVector.SPECIES_128);
                IntVector accNorm2_2 = IntVector.zero((VectorSpecies)IntVector.SPECIES_128);
                while (i < upperBound) {
                    ByteVector va8 = ByteVector.fromArray((VectorSpecies)ByteVector.SPECIES_64, (byte[])a, (int)i);
                    ByteVector vb8 = ByteVector.fromArray((VectorSpecies)ByteVector.SPECIES_64, (byte[])b, (int)i);
                    Vector va16 = va8.convertShape(VectorOperators.B2S, ShortVector.SPECIES_128, 0);
                    Vector vb16 = vb8.convertShape(VectorOperators.B2S, ShortVector.SPECIES_128, 0);
                    Vector prod16 = va16.mul(vb16);
                    Vector norm1_16 = va16.mul(va16);
                    Vector norm2_16 = vb16.mul(vb16);
                    Vector prod32_1 = prod16.convertShape(VectorOperators.S2I, IntVector.SPECIES_128, 0);
                    Vector prod32_2 = prod16.convertShape(VectorOperators.S2I, IntVector.SPECIES_128, 1);
                    Vector norm1_32_1 = norm1_16.convertShape(VectorOperators.S2I, IntVector.SPECIES_128, 0);
                    Vector norm1_32_2 = norm1_16.convertShape(VectorOperators.S2I, IntVector.SPECIES_128, 1);
                    Vector norm2_32_1 = norm2_16.convertShape(VectorOperators.S2I, IntVector.SPECIES_128, 0);
                    Vector norm2_32_2 = norm2_16.convertShape(VectorOperators.S2I, IntVector.SPECIES_128, 1);
                    accSum1 = accSum1.add(prod32_1);
                    accSum2 = accSum2.add(prod32_2);
                    accNorm1_1 = accNorm1_1.add(norm1_32_1);
                    accNorm1_2 = accNorm1_2.add(norm1_32_2);
                    accNorm2_1 = accNorm2_1.add(norm2_32_1);
                    accNorm2_2 = accNorm2_2.add(norm2_32_2);
                    i += ByteVector.SPECIES_64.length();
                }
                sum += accSum1.add((Vector)accSum2).reduceLanes(VectorOperators.ADD);
                norm1 += accNorm1_1.add((Vector)accNorm1_2).reduceLanes(VectorOperators.ADD);
                norm2 += accNorm2_1.add((Vector)accNorm2_2).reduceLanes(VectorOperators.ADD);
            }
        }
        while (i < a.length) {
            byte elem1 = a[i];
            byte elem2 = b[i];
            sum += elem1 * elem2;
            norm1 += elem1 * elem1;
            norm2 += elem2 * elem2;
            ++i;
        }
        return (float)((double)sum / Math.sqrt((double)norm1 * (double)norm2));
    }

    @Override
    public int squareDistance(byte[] a, byte[] b) {
        int i;
        int res = 0;
        if (a.length >= 16 && this.hasFastIntegerVectors) {
            int upperBound;
            if (INT_SPECIES_PREF_BIT_SIZE >= 256) {
                upperBound = PREF_BYTE_SPECIES.loopBound(a.length);
                IntVector acc = IntVector.zero((VectorSpecies)IntVector.SPECIES_PREFERRED);
                for (i = 0; i < upperBound; i += PREF_BYTE_SPECIES.length()) {
                    ByteVector va8 = ByteVector.fromArray(PREF_BYTE_SPECIES, (byte[])a, (int)i);
                    ByteVector vb8 = ByteVector.fromArray(PREF_BYTE_SPECIES, (byte[])b, (int)i);
                    Vector va16 = va8.convertShape(VectorOperators.B2S, PREF_SHORT_SPECIES, 0);
                    Vector vb16 = vb8.convertShape(VectorOperators.B2S, PREF_SHORT_SPECIES, 0);
                    Vector diff16 = va16.sub(vb16);
                    Vector diff32 = diff16.convertShape(VectorOperators.S2I, IntVector.SPECIES_PREFERRED, 0);
                    acc = acc.add(diff32.mul(diff32));
                }
                res += acc.reduceLanes(VectorOperators.ADD);
            } else {
                upperBound = ByteVector.SPECIES_64.loopBound(a.length);
                IntVector acc1 = IntVector.zero((VectorSpecies)IntVector.SPECIES_128);
                IntVector acc2 = IntVector.zero((VectorSpecies)IntVector.SPECIES_128);
                while (i < upperBound) {
                    ByteVector va8 = ByteVector.fromArray((VectorSpecies)ByteVector.SPECIES_64, (byte[])a, (int)i);
                    ByteVector vb8 = ByteVector.fromArray((VectorSpecies)ByteVector.SPECIES_64, (byte[])b, (int)i);
                    Vector va16 = va8.convertShape(VectorOperators.B2S, ShortVector.SPECIES_128, 0);
                    Vector vb16 = vb8.convertShape(VectorOperators.B2S, ShortVector.SPECIES_128, 0);
                    Vector diff16 = va16.sub(vb16);
                    Vector diff32_1 = diff16.convertShape(VectorOperators.S2I, IntVector.SPECIES_128, 0);
                    Vector diff32_2 = diff16.convertShape(VectorOperators.S2I, IntVector.SPECIES_128, 1);
                    acc1 = acc1.add(diff32_1.mul(diff32_1));
                    acc2 = acc2.add(diff32_2.mul(diff32_2));
                    i += ByteVector.SPECIES_64.length();
                }
                res += acc1.add((Vector)acc2).reduceLanes(VectorOperators.ADD);
            }
        }
        while (i < a.length) {
            int diff = a[i] - b[i];
            res += diff * diff;
            ++i;
        }
        return res;
    }

    static {
        if (INT_SPECIES_PREF_BIT_SIZE >= 256) {
            PREF_BYTE_SPECIES = ByteVector.SPECIES_MAX.withShape(VectorShape.forBitSize((int)(IntVector.SPECIES_PREFERRED.vectorBitSize() >> 2)));
            PREF_SHORT_SPECIES = ShortVector.SPECIES_MAX.withShape(VectorShape.forBitSize((int)(IntVector.SPECIES_PREFERRED.vectorBitSize() >> 1)));
        } else {
            PREF_BYTE_SPECIES = null;
            PREF_SHORT_SPECIES = null;
        }
    }
}

