/*
 * Decompiled with CFR 0.152.
 */
package org.forester.msa;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import org.forester.msa.BasicMsa;
import org.forester.msa.Msa;
import org.forester.sequence.BasicSequence;
import org.forester.sequence.MolecularSequence;
import org.forester.util.BasicDescriptiveStatistics;
import org.forester.util.DescriptiveStatistics;

public final class MsaMethods {
    private ArrayList<String> _ignored_seqs_ids;

    private MsaMethods() {
        this.init();
    }

    public Object clone() {
        throw new NoSuchMethodError();
    }

    public final synchronized Msa deleteGapColumns(double max_allowed_gap_ratio, int min_allowed_length, Msa msa) {
        this.init();
        if (max_allowed_gap_ratio < 0.0 || max_allowed_gap_ratio > 1.0) {
            throw new IllegalArgumentException("max allowed gap ration is out of range: " + max_allowed_gap_ratio);
        }
        boolean ignore_too_short_seqs = min_allowed_length > 0;
        boolean[] delete_cols = new boolean[msa.getLength()];
        int new_length = 0;
        for (int col = 0; col < msa.getLength(); ++col) {
            boolean bl = delete_cols[col] = (double)MsaMethods.calcGapSumPerColumn(msa, col) / (double)msa.getNumberOfSequences() > max_allowed_gap_ratio;
            if (delete_cols[col]) continue;
            ++new_length;
        }
        ArrayList<MolecularSequence> seqs = new ArrayList<MolecularSequence>(msa.getNumberOfSequences());
        for (int row = 0; row < msa.getNumberOfSequences(); ++row) {
            char[] mol_seq = new char[new_length];
            int new_col = 0;
            int non_gap_cols_sum = 0;
            for (int col = 0; col < msa.getLength(); ++col) {
                if (delete_cols[col]) continue;
                char residue = msa.getResidueAt(row, col);
                mol_seq[new_col++] = residue;
                if (residue == '-') continue;
                ++non_gap_cols_sum;
            }
            if (ignore_too_short_seqs) {
                if (non_gap_cols_sum >= min_allowed_length) {
                    seqs.add(new BasicSequence(msa.getIdentifier(row), mol_seq, msa.getType()));
                    continue;
                }
                this._ignored_seqs_ids.add(msa.getIdentifier(row).toString());
                continue;
            }
            seqs.add(new BasicSequence(msa.getIdentifier(row), mol_seq, msa.getType()));
        }
        if (seqs.size() < 1) {
            return null;
        }
        return BasicMsa.createInstance(seqs);
    }

    public synchronized ArrayList<String> getIgnoredSequenceIds() {
        return this._ignored_seqs_ids;
    }

    private synchronized void init() {
        this._ignored_seqs_ids = new ArrayList();
    }

    public static final DescriptiveStatistics calcNumberOfGapsStats(Msa msa) {
        int[] gaps = MsaMethods.calcNumberOfGapsInMsa(msa);
        BasicDescriptiveStatistics stats = new BasicDescriptiveStatistics();
        for (int gap : gaps) {
            stats.addValue(gap);
        }
        return stats;
    }

    public static final int[] calcNumberOfGapsInMsa(Msa msa) {
        int seqs = msa.getNumberOfSequences();
        int[] gaps = new int[seqs];
        for (int i = 0; i < seqs; ++i) {
            gaps[i] = MsaMethods.calcNumberOfGaps(msa.getSequence(i));
        }
        return gaps;
    }

    public static final int calcNumberOfGaps(MolecularSequence seq) {
        int gaps = 0;
        boolean was_gap = false;
        for (int i = 0; i < seq.getLength(); ++i) {
            if (seq.isGapAt(i)) {
                if (was_gap) continue;
                ++gaps;
                was_gap = true;
                continue;
            }
            was_gap = false;
        }
        return gaps;
    }

    public static DescriptiveStatistics calcBasicGapinessStatistics(Msa msa) {
        BasicDescriptiveStatistics stats = new BasicDescriptiveStatistics();
        for (int i = 0; i < msa.getLength(); ++i) {
            stats.addValue((double)MsaMethods.calcGapSumPerColumn(msa, i) / (double)msa.getNumberOfSequences());
        }
        return stats;
    }

    public static double calcGapRatio(Msa msa) {
        int gaps = 0;
        for (int seq = 0; seq < msa.getNumberOfSequences(); ++seq) {
            for (int i = 0; i < msa.getLength(); ++i) {
                if (msa.getResidueAt(seq, i) != '-') continue;
                ++gaps;
            }
        }
        return (double)gaps / (double)(msa.getLength() * msa.getNumberOfSequences());
    }

    public static int calcGapSumPerColumn(Msa msa, int col) {
        int gap_rows = 0;
        for (int j = 0; j < msa.getNumberOfSequences(); ++j) {
            if (!msa.isGapAt(j, col)) continue;
            ++gap_rows;
        }
        return gap_rows;
    }

    public static final double calcNormalizedShannonsEntropy(int k, Msa msa) {
        double s = 0.0;
        for (int col = 0; col < msa.getLength(); ++col) {
            s += MsaMethods.calcNormalizedShannonsEntropy(k, msa, col);
        }
        return s / (double)msa.getLength();
    }

    public static final double calcNormalizedShannonsEntropy(int k, Msa msa, int col) {
        double s = 0.0;
        double n = msa.getNumberOfSequences();
        HashMap<Character, Integer> dist = null;
        if (k == 6) {
            dist = MsaMethods.calcResidueDistribution6(msa, col);
        } else if (k == 7) {
            dist = MsaMethods.calcResidueDistribution7(msa, col);
        } else if (k == 20) {
            dist = MsaMethods.calcResidueDistribution20(msa, col);
        } else if (k == 21) {
            dist = MsaMethods.calcResidueDistribution21(msa, col);
        } else {
            throw new IllegalArgumentException("illegal value for k: " + k);
        }
        if (dist.size() == 1) {
            return 0.0;
        }
        for (int na : dist.values()) {
            double pa = (double)na / n;
            s += pa * Math.log(pa);
        }
        if (n < (double)k) {
            return -(s / Math.log(n));
        }
        return -(s / Math.log(k));
    }

    public static final DescriptiveStatistics calculateEffectiveLengthStatistics(Msa msa) {
        BasicDescriptiveStatistics stats = new BasicDescriptiveStatistics();
        for (int row = 0; row < msa.getNumberOfSequences(); ++row) {
            MolecularSequence s = msa.getSequence(row);
            stats.addValue(s.getLength() - s.getNumberOfGapResidues());
        }
        return stats;
    }

    public static final DescriptiveStatistics calculateIdentityRatio(int from, int to, Msa msa) {
        BasicDescriptiveStatistics stats = new BasicDescriptiveStatistics();
        for (int c = from; c <= to; ++c) {
            stats.addValue(MsaMethods.calculateIdentityRatio(msa, c));
        }
        return stats;
    }

    public static final double calculateIdentityRatio(Msa msa, int column) {
        SortedMap<Character, Integer> dist = MsaMethods.calculateResidueDestributionPerColumn(msa, column);
        int majority_count = 0;
        for (Map.Entry<Character, Integer> pair : dist.entrySet()) {
            if (pair.getValue() <= majority_count) continue;
            majority_count = pair.getValue();
        }
        return (double)majority_count / (double)msa.getNumberOfSequences();
    }

    public static SortedMap<Character, Integer> calculateResidueDestributionPerColumn(Msa msa, int column) {
        TreeMap<Character, Integer> map = new TreeMap<Character, Integer>();
        for (Character r : msa.getColumnAt(column)) {
            if (r.charValue() == '-') continue;
            if (!map.containsKey(r)) {
                map.put(r, 1);
                continue;
            }
            map.put(r, (Integer)map.get(r) + 1);
        }
        return map;
    }

    public static synchronized MsaMethods createInstance() {
        return new MsaMethods();
    }

    public static final Msa removeSequence(Msa msa, String to_remove_id) {
        ArrayList<MolecularSequence> seqs = new ArrayList<MolecularSequence>();
        for (int row = 0; row < msa.getNumberOfSequences(); ++row) {
            if (to_remove_id.equals(msa.getIdentifier(row))) continue;
            seqs.add(msa.getSequence(row));
        }
        if (seqs.size() < 1) {
            return null;
        }
        return BasicMsa.createInstance(seqs);
    }

    public static final Msa removeSequences(Msa msa, List<String> to_remove_ids) {
        ArrayList<MolecularSequence> seqs = new ArrayList<MolecularSequence>();
        for (int row = 0; row < msa.getNumberOfSequences(); ++row) {
            if (to_remove_ids.contains(msa.getIdentifier(row))) continue;
            seqs.add(msa.getSequence(row));
        }
        if (seqs.size() < 1) {
            return null;
        }
        return BasicMsa.createInstance(seqs);
    }

    public static Msa removeSequencesByMinimalLength(Msa msa, int min_effective_length) {
        ArrayList<Integer> to_remove_rows = new ArrayList<Integer>();
        for (int seq = 0; seq < msa.getNumberOfSequences(); ++seq) {
            int eff_length = 0;
            for (int i = 0; i < msa.getLength(); ++i) {
                if (msa.getResidueAt(seq, i) == '-') continue;
                ++eff_length;
            }
            if (eff_length >= min_effective_length) continue;
            to_remove_rows.add(seq);
        }
        return MsaMethods.removeSequencesByRow(msa, to_remove_rows);
    }

    public static final Msa removeSequencesByRow(Msa msa, List<Integer> to_remove_rows) {
        ArrayList<MolecularSequence> seqs = new ArrayList<MolecularSequence>();
        for (int row = 0; row < msa.getNumberOfSequences(); ++row) {
            if (to_remove_rows.contains(row)) continue;
            seqs.add(msa.getSequence(row));
        }
        if (seqs.size() < 1) {
            return null;
        }
        return BasicMsa.createInstance(seqs);
    }

    private static final HashMap<Character, Integer> calcResidueDistribution20(Msa msa, int col) {
        HashMap<Character, Integer> counts = new HashMap<Character, Integer>();
        for (int row = 0; row < msa.getNumberOfSequences(); ++row) {
            char c = msa.getResidueAt(row, col);
            if (c == '-') continue;
            if (!counts.containsKey(Character.valueOf(c))) {
                counts.put(Character.valueOf(c), 1);
                continue;
            }
            counts.put(Character.valueOf(c), 1 + counts.get(Character.valueOf(c)));
        }
        return counts;
    }

    private static final HashMap<Character, Integer> calcResidueDistribution21(Msa msa, int col) {
        HashMap<Character, Integer> counts = new HashMap<Character, Integer>();
        for (int row = 0; row < msa.getNumberOfSequences(); ++row) {
            char c = msa.getResidueAt(row, col);
            if (!counts.containsKey(Character.valueOf(c))) {
                counts.put(Character.valueOf(c), 1);
                continue;
            }
            counts.put(Character.valueOf(c), 1 + counts.get(Character.valueOf(c)));
        }
        return counts;
    }

    private static final HashMap<Character, Integer> calcResidueDistribution6(Msa msa, int col) {
        HashMap<Character, Integer> counts = new HashMap<Character, Integer>();
        for (int row = 0; row < msa.getNumberOfSequences(); ++row) {
            int x;
            char c = msa.getResidueAt(row, col);
            if (c == 'A' || c == 'V' || c == 'L' || c == 'I' || c == 'M' || c == 'C') {
                x = 97;
            } else if (c == 'F' || c == 'W' || c == 'Y' || c == 'H') {
                x = 114;
            } else if (c == 'S' || c == 'T' || c == 'N' || c == 'Q') {
                x = 112;
            } else if (c == 'K' || c == 'R') {
                x = 111;
            } else if (c == 'D' || c == 'E') {
                x = 101;
            } else {
                if (c != 'G' && c != 'P') continue;
                x = 115;
            }
            if (!counts.containsKey(Character.valueOf((char)x))) {
                counts.put(Character.valueOf((char)x), 1);
                continue;
            }
            counts.put(Character.valueOf((char)x), 1 + counts.get(Character.valueOf((char)x)));
        }
        return counts;
    }

    private static final HashMap<Character, Integer> calcResidueDistribution7(Msa msa, int col) {
        HashMap<Character, Integer> counts = new HashMap<Character, Integer>();
        for (int row = 0; row < msa.getNumberOfSequences(); ++row) {
            char c = msa.getResidueAt(row, col);
            int x = 45;
            if (c == 'A' || c == 'V' || c == 'L' || c == 'I' || c == 'M' || c == 'C') {
                x = 97;
            } else if (c == 'F' || c == 'W' || c == 'Y' || c == 'H') {
                x = 114;
            } else if (c == 'S' || c == 'T' || c == 'N' || c == 'Q') {
                x = 112;
            } else if (c == 'K' || c == 'R') {
                x = 111;
            } else if (c == 'D' || c == 'E') {
                x = 101;
            } else if (c == 'G' || c == 'P') {
                x = 115;
            }
            if (!counts.containsKey(Character.valueOf((char)x))) {
                counts.put(Character.valueOf((char)x), 1);
                continue;
            }
            counts.put(Character.valueOf((char)x), 1 + counts.get(Character.valueOf((char)x)));
        }
        return counts;
    }
}

