/*
 * Decompiled with CFR 0.152.
 */
package Ace2;

import Ace2.SAMUtils;
import Ace2.SoftClipTrack;
import Ace2.WorkingFile;
import Funk.Str;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class SAMExtractSoftClipped {
    private File sam_file;
    private String report_name;
    private static int MINIMUM_CLIPPED_LENGTH = 18;
    private static int MAX_CLIP_LENGTH_TRACK = 100;
    private static int MINIMUM_CLIPPED_TRACK_LENGTH = 10;
    private static final int FLUSH_BOUNDARY = 250;
    private boolean FILTER_LOW_QUALITY = true;
    private int MINIMUM_MEAN_CLIPPED_REGION_QUALITY = 15;

    public SAMExtractSoftClipped(File sam_file) {
        this.sam_file = sam_file;
        this.report_name = sam_file.getName() + "_softclip.fa";
    }

    public void report() throws FileNotFoundException, IOException {
        int i;
        SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(this.sam_file);
        WorkingFile wf = new WorkingFile(this.report_name);
        PrintStream ps = wf.getPrintStream();
        long read_count = 0L;
        long unmapped_count = 0L;
        long duplicate_count = 0L;
        long extracted_count = 0L;
        long rejected_lq_clips = 0L;
        long[] clip_lengths = new long[MAX_CLIP_LENGTH_TRACK + 1];
        Arrays.fill(clip_lengths, 0L);
        int flush_interval = 100000;
        HashMap<Integer, Integer> total_clip_counts = new HashMap<Integer, Integer>();
        HashSet<SoftClipTrack> queue = new HashSet<SoftClipTrack>();
        Integer last_ri = null;
        int ri = 0;
        for (SAMRecord sr : reader) {
            if (++read_count % 1000000L == 0L) {
                System.err.println("read " + read_count);
            }
            if (read_count % (long)flush_interval == 0L) {
                this.queue_flush_check(queue, total_clip_counts, ps, sr.getAlignmentStart(), false);
            }
            boolean extract = false;
            int max_clen = 0;
            Integer this_ri = sr.getReferenceIndex();
            if (last_ri == null || !this_ri.equals(last_ri)) {
                this.queue_flush_check(queue, total_clip_counts, ps, 0, true);
                last_ri = this_ri;
            }
            if (sr.getReadUnmappedFlag()) {
                ++unmapped_count;
            } else if (sr.getDuplicateReadFlag()) {
                ++duplicate_count;
            } else {
                ri = 0;
                for (CigarElement ce : sr.getCigar().getCigarElements()) {
                    CigarOperator co = ce.getOperator();
                    int clen = ce.getLength();
                    if (co.equals((Object)CigarOperator.MATCH_OR_MISMATCH) || co.equals((Object)CigarOperator.INSERTION)) {
                        ri += clen;
                        continue;
                    }
                    if (co.equals((Object)CigarOperator.SOFT_CLIP)) {
                        boolean usable = true;
                        if (this.FILTER_LOW_QUALITY) {
                            int end = ri + clen;
                            byte[] quals = sr.getBaseQualities();
                            if (quals == null) {
                                System.err.println("ERROR: null quality array");
                            } else {
                                int mean;
                                int qcount = 0;
                                int total = 0;
                                for (i = ri; i < end; ++i) {
                                    if (i >= quals.length) {
                                        System.err.println("ERROR: read index past end of qual array!");
                                        continue;
                                    }
                                    ++qcount;
                                    total += quals[i];
                                }
                                if (qcount > 0 && (mean = total / qcount) < this.MINIMUM_MEAN_CLIPPED_REGION_QUALITY) {
                                    usable = false;
                                    ++rejected_lq_clips;
                                }
                            }
                        }
                        if (usable) {
                            if (clen <= MAX_CLIP_LENGTH_TRACK) {
                                int n = clen;
                                clip_lengths[n] = clip_lengths[n] + 1L;
                            }
                            if (clen > max_clen) {
                                max_clen = clen;
                            }
                            extract = clen >= MINIMUM_CLIPPED_LENGTH;
                        }
                        ri += clen;
                        continue;
                    }
                    if (co.equals((Object)CigarOperator.SKIPPED_REGION) || co.equals((Object)CigarOperator.HARD_CLIP) || co.equals((Object)CigarOperator.DELETION) || co.equals((Object)CigarOperator.PADDING)) continue;
                    System.err.println("ERROR: unhandled CIGAR operator " + co);
                    System.exit(1);
                }
            }
            ArrayList<Integer> clip_sites = new ArrayList<Integer>();
            this.clip_track(clip_sites, sr.getAlignmentStart(), sr.getUnclippedStart());
            this.clip_track(clip_sites, sr.getAlignmentEnd(), sr.getUnclippedEnd());
            for (Integer site : clip_sites) {
                Integer count = total_clip_counts.get(site);
                if (count == null) {
                    count = 0;
                }
                total_clip_counts.put(site, count + 1);
            }
            if (!extract) continue;
            SoftClipTrack sct = new SoftClipTrack();
            sct.position = sr.getAlignmentEnd();
            sct.sr = sr;
            sct.max_clen = max_clen;
            sct.clip_sites = clip_sites;
            queue.add(sct);
            ++extracted_count;
        }
        this.queue_flush_check(queue, total_clip_counts, ps, 0, true);
        System.err.println("read count: " + read_count);
        System.err.println("unmapped count: " + unmapped_count);
        System.err.println("optical/pcr duplicate count: " + duplicate_count);
        System.err.println("rejected low-quality clipped regions: " + rejected_lq_clips);
        System.err.println("extracted count: " + extracted_count);
        for (i = 0; i <= MAX_CLIP_LENGTH_TRACK; ++i) {
            if (clip_lengths[i] <= 0L) continue;
            System.err.println("count for soft-clip length " + i + ": " + clip_lengths[i]);
        }
        wf.finish();
    }

    public static void main(String[] argv) {
        String bam_file = null;
        for (int i = 0; i < argv.length; ++i) {
            if (argv[i].equals("-bam")) {
                bam_file = new String(argv[++i]);
                continue;
            }
            System.err.println("ERROR: unknown parameter " + argv[i]);
            System.exit(1);
        }
        if (bam_file == null) {
            System.err.println("ERROR: specify -bam [file]");
        } else {
            try {
                SAMExtractSoftClipped esc = new SAMExtractSoftClipped(new File(bam_file));
                esc.report();
            }
            catch (Exception e) {
                System.err.println("ERROR: " + e);
                e.printStackTrace();
            }
        }
    }

    private void clip_track(ArrayList<Integer> clip_sites, int clipped, int unclipped) {
        int len;
        if (clipped != unclipped && (len = Math.abs(clipped - unclipped)) >= MINIMUM_CLIPPED_TRACK_LENGTH) {
            clip_sites.add(clipped);
        }
    }

    private void queue_flush_check(HashSet<SoftClipTrack> queue, HashMap<Integer, Integer> total_clip_counts, PrintStream ps, int current_position, boolean force) {
        ArrayList<SoftClipTrack> remove = new ArrayList<SoftClipTrack>();
        current_position -= 250;
        for (SoftClipTrack sct : queue) {
            if (!force && current_position <= sct.position) continue;
            boolean trackable = true;
            if (sct.clip_sites.size() == 0) {
                System.err.println("odd read w/no leading/trailing softclip: " + sct.sr.getReadName() + " " + sct.sr.getReferenceName() + ":" + sct.sr.getAlignmentStart() + " CIGAR=" + SAMUtils.cigar_to_string(sct.sr.getCigar()));
                trackable = false;
            }
            ArrayList<String> stuff = new ArrayList<String>();
            HashMap<String, String> features = new HashMap<String, String>();
            features.put("pos", sct.sr.getReferenceName() + "." + sct.sr.getAlignmentStart());
            features.put("strand", sct.sr.getReadNegativeStrandFlag() ? "-" : "+");
            features.put("clip_len", Integer.toString(sct.max_clen));
            if (trackable) {
                ArrayList<String> mapped = new ArrayList<String>();
                for (Integer site : sct.clip_sites) {
                    mapped.add(sct.sr.getReferenceName() + "." + site + ":" + total_clip_counts.get(site));
                }
                features.put("clip_sites", Str.join(",", mapped));
            }
            ArrayList flist = new ArrayList(features.keySet());
            for (String key : flist) {
                stuff.add(key + "=" + (String)features.get(key));
            }
            ps.println(">" + sct.sr.getReadName() + " " + Str.join("|", stuff));
            ps.println(new String(sct.sr.getReadBases()));
            remove.add(sct);
        }
        System.err.println("queue before: " + queue.size() + "/" + total_clip_counts.size());
        queue.removeAll(remove);
        HashSet<Integer> remove_sites = new HashSet<Integer>();
        for (Integer site : total_clip_counts.keySet()) {
            if (!force && current_position <= site) continue;
            remove_sites.add(site);
        }
        for (Integer site : remove_sites) {
            total_clip_counts.remove(site);
        }
        System.err.println("       after: " + queue.size() + "/" + total_clip_counts.size());
    }
}

