/*
 * Decompiled with CFR 0.152.
 */
package org.monarchinitiative.sgenes.gtf.io.gtf;

import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.monarchinitiative.sgenes.gtf.io.gtf.GtfFeature;
import org.monarchinitiative.sgenes.gtf.io.gtf.GtfFrame;
import org.monarchinitiative.sgenes.gtf.io.gtf.GtfParseException;
import org.monarchinitiative.sgenes.gtf.io.gtf.GtfRecord;
import org.monarchinitiative.sgenes.gtf.io.gtf.GtfSource;
import org.monarchinitiative.svart.Contig;
import org.monarchinitiative.svart.CoordinateSystem;
import org.monarchinitiative.svart.Coordinates;
import org.monarchinitiative.svart.GenomicRegion;
import org.monarchinitiative.svart.Strand;
import org.monarchinitiative.svart.assembly.GenomicAssembly;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class GtfRecordParser {
    static final CoordinateSystem COORDINATE_SYSTEM = CoordinateSystem.oneBased();
    private static final Logger LOGGER = LoggerFactory.getLogger(GtfRecordParser.class);
    private static final Pattern ATTRIBUTE_PATTERN = Pattern.compile("\\s*(?<key>[\\w_]+)\\s*\"?(?<value>[\\w\\d\\s_:./()\\-,]*)\"?\\s*");
    private static final Pattern TAG_PATTERN = Pattern.compile("tag\\s*\"(?<value>[\\w\\d_:./\\-]+)\"");
    private static final int N_ATTRIBUTE_FIELDS = 26;

    private GtfRecordParser() {
    }

    public static Optional<GtfRecord> parseLine(String line, GenomicAssembly assembly) {
        String[] token = line.split("\t");
        try {
            GenomicRegion location = GtfRecordParser.parseLocation(assembly, token[0], token[6], token[3], token[4]);
            GtfSource source = GtfRecordParser.parseSource(token[1]);
            GtfFeature feature = GtfRecordParser.parseFeature(token[2]);
            GtfFrame frame = GtfRecordParser.parseFrame(token[7]);
            Map<String, List<String>> attributes = GtfRecordParser.parseAttributes(token[8]);
            if (!attributes.containsKey("gene_id")) {
                LOGGER.warn("The mandatory `gene_id` attribute is missing. Line {}", (Object)line);
                return Optional.empty();
            }
            Set<String> tags = GtfRecordParser.parseTags(token[8]);
            return Optional.of(GtfRecord.of(location, source, feature, frame, attributes, tags));
        }
        catch (GtfParseException e) {
            LOGGER.warn("{}. Line {}", (Object)e.getMessage(), (Object)line);
            return Optional.empty();
        }
    }

    private static GenomicRegion parseLocation(GenomicAssembly assembly, String contigName, String strandValue, String start, String end) throws GtfParseException {
        Contig contig = assembly.contigByName(contigName);
        if (contig.isUnknown()) {
            throw new GtfParseException("Unknown contig `" + contigName + "`");
        }
        Strand strand = GtfRecordParser.parseStrand(strandValue);
        Coordinates coordinates = GtfRecordParser.parseCoordinates(contig, strand, start, end);
        return GenomicRegion.of((Contig)contig, (Strand)strand, (Coordinates)coordinates);
    }

    private static GtfFrame parseFrame(String payload) throws GtfParseException {
        switch (payload) {
            case ".": {
                return GtfFrame.NA;
            }
            case "0": {
                return GtfFrame.ZERO;
            }
            case "1": {
                return GtfFrame.ONE;
            }
            case "2": {
                return GtfFrame.TWO;
            }
        }
        throw new GtfParseException(String.format("Unknown GTF frame: `%s`", payload));
    }

    private static Strand parseStrand(String strand) throws GtfParseException {
        switch (strand) {
            case "+": {
                return Strand.POSITIVE;
            }
            case "-": {
                return Strand.NEGATIVE;
            }
        }
        throw new GtfParseException("Unknown strand `" + strand + "`");
    }

    private static Coordinates parseCoordinates(Contig contig, Strand strand, String startPos, String endPos) throws GtfParseException {
        try {
            int start = Integer.parseInt(startPos);
            int end = Integer.parseInt(endPos);
            if (strand.isPositive()) {
                return Coordinates.of((CoordinateSystem)COORDINATE_SYSTEM, (int)start, (int)end);
            }
            int startOnNegative = Coordinates.invertCoordinate((CoordinateSystem)COORDINATE_SYSTEM, (Contig)contig, (int)end);
            int endOnNegative = Coordinates.invertCoordinate((CoordinateSystem)COORDINATE_SYSTEM, (Contig)contig, (int)start);
            return Coordinates.of((CoordinateSystem)COORDINATE_SYSTEM, (int)startOnNegative, (int)endOnNegative);
        }
        catch (NumberFormatException e) {
            throw new GtfParseException("Unparsable coordinates: start=" + startPos + ", end=" + endPos);
        }
    }

    private static GtfSource parseSource(String payload) throws GtfParseException {
        switch (payload) {
            case "ENSEMBL": {
                return GtfSource.ENSEMBL;
            }
            case "HAVANA": {
                return GtfSource.HAVANA;
            }
            case "BestRefSeq": 
            case "BestRefSeq%2CGnomon": {
                return GtfSource.BEST_REF_SEQ;
            }
            case "Curated Genomic": {
                return GtfSource.CURATED_GENOMIC;
            }
            case "Gnomon": {
                return GtfSource.GNOMON;
            }
            case "RefSeq": {
                return GtfSource.REF_SEQ;
            }
            case "tRNAscan-SE": {
                return GtfSource.T_RNA_SCAN_SE;
            }
        }
        throw new GtfParseException(String.format("Unknown GTF record source: `%s`", payload));
    }

    private static GtfFeature parseFeature(String payload) throws GtfParseException {
        switch (payload) {
            case "exon": {
                return GtfFeature.EXON;
            }
            case "CDS": {
                return GtfFeature.CDS;
            }
            case "UTR": {
                return GtfFeature.UTR;
            }
            case "transcript": {
                return GtfFeature.TRANSCRIPT;
            }
            case "start_codon": {
                return GtfFeature.START_CODON;
            }
            case "stop_codon": {
                return GtfFeature.STOP_CODON;
            }
            case "gene": {
                return GtfFeature.GENE;
            }
            case "Selenocysteine": {
                return GtfFeature.SELENOCYSTEINE;
            }
        }
        throw new GtfParseException(String.format("Unknown GTF feature type: `%s`", payload));
    }

    static Map<String, List<String>> parseAttributes(String payload) throws GtfParseException {
        String[] tokens;
        HashMap<String, List<String>> attributes = new HashMap<String, List<String>>(26);
        for (String token : tokens = payload.split(";")) {
            Matcher matcher = ATTRIBUTE_PATTERN.matcher(token);
            if (!matcher.matches()) continue;
            String key = matcher.group("key");
            String value = matcher.group("value");
            attributes.computeIfAbsent(key, k -> new LinkedList()).add(value);
        }
        return attributes;
    }

    private static Set<String> parseTags(String payload) {
        Matcher matcher = TAG_PATTERN.matcher(payload);
        HashSet<String> builder = new HashSet<String>();
        while (matcher.find()) {
            builder.add(matcher.group("value"));
        }
        return Set.copyOf(builder);
    }
}

