#! /usr/bin/env python

from __future__ import print_function
import os,sys
import argparse

import errno
from time import time, sleep
import itertools
import tempfile
import shutil

import math
import re
from collections import deque
from collections import defaultdict

import edlib

from modules import create_augmented_reference, help_functions, correct_seqs #,align

def eprint(*args, **kwargs):
    print(*args, file=sys.stderr, **kwargs)



def get_kmer_minimizers(seq, k_size, w_size):
    # kmers = [seq[i:i+k_size] for i in range(len(seq)-k_size) ]
    w = w_size - k_size
    window_kmers = deque([seq[i:i+k_size] for i in range(w +1)])
    curr_min = min(window_kmers)
    minimizers = [ (curr_min, list(window_kmers).index(curr_min)) ]

    for i in range(w+1,len(seq) - k_size):
        new_kmer = seq[i:i+k_size]
        # updateing window
        discarded_kmer = window_kmers.popleft()
        window_kmers.append(new_kmer)

        # we have discarded previous windows minimizer, look for new minimizer brute force
        if curr_min == discarded_kmer: 
            curr_min = min(window_kmers)
            minimizers.append( (curr_min, list(window_kmers).index(curr_min) + i - w ) )

        # Previous minimizer still in window, we only need to compare with the recently added kmer 
        elif new_kmer < curr_min:
            curr_min = new_kmer
            minimizers.append( (curr_min, i) )

    return minimizers

def get_kmer_maximizers(seq, k_size, w_size):
    # kmers = [seq[i:i+k_size] for i in range(len(seq)-k_size) ]
    w = w_size - k_size
    window_kmers = deque([seq[i:i+k_size] for i in range(w +1)])
    curr_min = max(window_kmers)
    minimizers = [ (curr_min, list(window_kmers).index(curr_min)) ]

    for i in range(w+1,len(seq) - k_size):
        new_kmer = seq[i:i+k_size]
        # updateing window
        discarded_kmer = window_kmers.popleft()
        window_kmers.append(new_kmer)

        # we have discarded previous windows minimizer, look for new minimizer brute force
        if curr_min == discarded_kmer: 
            curr_min = max(window_kmers)
            minimizers.append( (curr_min, list(window_kmers).index(curr_min) + i - w ) )

        # Previous minimizer still in window, we only need to compare with the recently added kmer 
        elif new_kmer > curr_min:
            curr_min = new_kmer
            minimizers.append( (curr_min, i) )

    return minimizers


def get_minimizers_and_positions_compressed(reads, w, k, hash_fcn):
    # 1. homopolymenr compress read and obtain minimizers
    M = {}
    for r_id in reads:
        (acc, seq, qual) = reads[r_id]

        seq_hpol_comp = ''.join(ch for ch, _ in itertools.groupby(seq))

        if hash_fcn == "lex":
            minimizers = get_kmer_minimizers(seq_hpol_comp, k, w)
        elif hash_fcn == "rev_lex":
            minimizers = get_kmer_maximizers(seq_hpol_comp, k, w)

        indices = [i for i, (n1,n2) in enumerate(zip(seq[:-1],seq[1:])) if n1 != n2] # indicies we want to take quality values from to get quality string of homopolymer compressed read 
        indices.append(len(seq) - 1)
        positions_in_non_compressed_sring = [(m, indices[p]) for m, p in minimizers ]
        M[r_id] = positions_in_non_compressed_sring

    return M


def get_minimizers_and_positions(reads, w, k, hash_fcn):
    # 1. homopolymenr compress read and obtain minimizers
    M = {}
    for r_id in reads:
        (acc, seq, qual) = reads[r_id]
        if hash_fcn == "lex":
            minimizers = get_kmer_minimizers(seq, k, w)
        elif hash_fcn == "rev_lex":
            minimizers = get_kmer_maximizers(seq, k, w)

        M[r_id] = minimizers

    return M



from array import array
def get_minimizer_combinations_database(reads, M, k, x_low, x_high):
    # M2 = defaultdict(lambda: defaultdict(list))
    M2 = defaultdict(lambda: defaultdict(lambda :array("I")))
    tmp_cnt = 0
    forbidden = 'A'*k
    for r_id in M:
        minimizers = M[r_id]
        for (m1,p1), m1_curr_spans in  minimizers_comb_iterator(minimizers, k, x_low, x_high):
            for (m2, p2) in m1_curr_spans:
                if m2 == m1 == forbidden:
                    continue

                tmp_cnt +=1
                # t = array('I', [r_id, p1, p2])
                # M2[m1][m2].append( t )
                # M2[m1][m2].append((r_id, p1, p2))

                M2[m1][m2].append(r_id)
                M2[m1][m2].append(p1)
                M2[m1][m2].append(p2)

    print(tmp_cnt, "MINIMIZER COMBINATIONS GENERATED")
    # import time
    # time.sleep(10)
    # sys.exit()

    avg_bundance = 0
    singleton_minimzer = 0
    cnt = 1
    abundants=[]
    for m1 in list(M2.keys()):
        for m2 in list(M2[m1].keys()):
            if len(M2[m1][m2]) > 3:
                avg_bundance += len(M2[m1][m2])//3
                cnt +=1
            else:
                del M2[m1][m2]
                singleton_minimzer += 1

            if len(M2[m1][m2])// 3 > len(reads):
                abundants.append((m1,m2, len(M2[m1][m2])//3 ))
                if m2 == forbidden: # poly A tail
                    del M2[m1][m2]
    for m1,m2,ab in sorted(abundants, key=lambda x: x[2], reverse=True):
        print("Too abundant:", m1, m2, ab, len(reads))

    print("Average abundance for non-unique minimizer-combs:", avg_bundance/float(cnt))
    print("Number of singleton minimizer combinations filtered out:", singleton_minimzer)

    return M2



def minimizers_comb_iterator(minimizers, k, x_low, x_high):
    # print("read")
    for i, (m1, p1) in enumerate(minimizers[:-1]):
        m1_curr_spans = []
        for j, (m2, p2) in enumerate(minimizers[i+1:]):
            if x_low < p2 - p1 and p2 - p1 <= x_high:
                m1_curr_spans.append( (m2, p2) )
                # yield (m1,p1), (m2, p2) 
            elif p2 - p1 > x_high:
                break
        yield (m1, p1), m1_curr_spans[::-1]


def edlib_alignment(x, y, k):
    # if i == 100 and j % 1000 == 0:
    #     print("Edlib processed alignments: {0}".format(j+1))

    result = edlib.align(x,y, "NW", 'dist', k) # , task="path")
    ed = result["editDistance"]
    # locations = result["locations"]
    return ed #, locations


def get_context_offset(vector, k):
    nuc_obs = 0
    if not vector:
        return 0
    for offset, n in enumerate(vector):
        if n != '-':
            nuc_obs += 1

        if nuc_obs >= k:
            break
    return offset

# from modules import kmer_analysis

def get_contexts(alignment_matrix, k_size):
    ref_aln = alignment_matrix["ref"]
    contexts_per_pos = []
    for i_tmp in range(len(ref_aln)):
        nuc_obs = 0
        # j_tmp = i_tmp
        p1 = get_context_offset(ref_aln[:i_tmp][::-1], k_size)
        p2 = get_context_offset(ref_aln[i_tmp + 1:], k_size) + 1
        # print(p1,p2)
        # print(ref_aln)
        contexts_per_pos.append((i_tmp - p1, i_tmp + p2))
    return contexts_per_pos

def is_substring(seq, set_of_seqs):
    for s in set_of_seqs:
        if seq in s or s in seq:
            return True
    return False

import numpy as np
# import numba
# from numba import jit
# @jit(nopython=True)
def test_numba(A, contexts_per_pos, n, context_threshold):
    FCM = [] # frequency context matrix

    prev_context_start, prev_context_stop = -1,-1
    prev_context = []

    for i in range(n):
        eligible_contexts = []
        context_start, context_stop = contexts_per_pos[i][0], contexts_per_pos[i][1]
        variant_pos = i - context_start
        if context_start == prev_context_start and context_stop == prev_context_stop:
            # use previous context and depths but shift variant position one forward
            for v, cn, dep in FCM[i-1]:
                # print(v, cn, dep, variant_pos, i, context_start)
                # print(len(cn), variant_pos, i, "window",context_start, context_stop, n)
                # eligible_contexts.append((cn[min(variant_pos, len(cn)-1)] ,cn, dep))
                eligible_contexts.append((cn[variant_pos] ,cn, dep))

            FCM.append( eligible_contexts )
            continue
        else:
            prev_context_start, prev_context_stop = context_start, context_stop

        context = A[ :, context_start :context_stop] 
        b = np.ascontiguousarray(context).view(np.dtype((np.void, context.dtype.itemsize * context.shape[1])))
        unq_a, unq_cnt = np.unique(b, return_counts=True)
        unq_a = unq_a.view(context.dtype).reshape(-1, context.shape[1])
        # print( unq_a)
        # print('OK', unq_cnt)         
        # print('Unique Values along with occurrence Count')
        # Iterate over the zip object
        for (j, count) in sorted(enumerate(unq_cnt), key = lambda x: x[1], reverse=True):
            # print(count)
            if count > max(context_threshold/10, 5):
                # print("im here")
                context = tuple(unq_a[j].tolist())
                # try:
                # print('lol', context, variant_pos, count)
                # variant = context[min(variant_pos, len(context)-1)] #context[variant_pos]
                variant = context[variant_pos] #context[variant_pos]
                eligible_contexts.append( (variant, context, count ) )
                # FCM.[i][ (variant, context ) ] = count
                # except:
                #     pass
            else: 
                # print('Breaking', count)
                break
        FCM.append(eligible_contexts)
    return FCM


def sep_function_test(alignment_matrix, FCM, contexts_per_pos):
    for acc, aln_tuple in alignment_matrix.items():
        # aln_tuple = tuple(aln_list)
        if acc == "ref":
            continue
        subtuples = [ (j, aln_tuple[contexts_per_pos[j][0] :contexts_per_pos[j][1]]) for j in range(1, len(aln_tuple)) if contexts_per_pos[j][0] != contexts_per_pos[j-1][0] or contexts_per_pos[j][1] != contexts_per_pos[j-1][1] ]
        for j in range(len(aln_tuple)):
            FCM[j][ (aln_tuple[j], subtuples[j]) ] += 1

    #     prev_context_start, prev_context_stop = -1,-1
    #     prev_context = tuple()
    #     for j in range(len(aln_tuple)):
    #         context_start, context_stop = contexts_per_pos[j][0], contexts_per_pos[j][1]
    #         if context_start == prev_context_start and context_stop == prev_context_stop:
    #             context = prev_context
    #         else:
    #             context =  aln_tuple[context_start :context_stop] #prev_context
    #             # context =  aln_tuple[context_start :context_stop] #prev_context
    #             # context = "".join([s for s in aln_tuple[context_start :context_stop]]) #tuple( aln_tuple[context_start :context_stop] )
            
    #         variant = aln_tuple[j]
    #         FCM[j][ (variant, context) ] += 1
    #         prev_context_start, prev_context_stop = context_start, context_stop
    #         prev_context = context
    # # return FCM

def sep_function(alignment_matrix, FCM, contexts_per_pos):
    for acc, aln_list in alignment_matrix.items():
        aln_tuple = tuple(aln_list)
        # if acc == "ref":
        #     continue
        prev_context_start, prev_context_stop = -1,-1
        prev_context = []
        for j in range(len(aln_tuple)):
            context_start, context_stop = contexts_per_pos[j][0], contexts_per_pos[j][1]
            if context_start == prev_context_start and context_stop == prev_context_stop:
                context = prev_context
            else:
                context = tuple( aln_tuple[context_start :context_stop] )
            
            variant = aln_tuple[j]
            FCM[j][ (variant, context) ] += 1
            prev_context_start, prev_context_stop = context_start, context_stop
            prev_context = context
    # return FCM

# def sep_function_only_str(partition):
#     FCM = [defaultdict(int) for j in range(nr_columns)] # frequency context matrix

#     for (q_id, pos1, pos2) in partition = 
#     (res["editDistance"], ref_alignment, read_alignment, 1)
#     return FCM

def get_alternative_ref_contexts(alignment_matrix, contexts_per_pos, context_threshold, disable_numpy):

    ref_aln = alignment_matrix["ref"]
    nr_columns = len(ref_aln)
    if disable_numpy:
        FCM = [defaultdict(int) for j in range(nr_columns)] # frequency context matrix
        sep_function(alignment_matrix, FCM, contexts_per_pos)
    else:
        A = np.array([l for l in alignment_matrix.values()])
        n = len(A[0])
        FCM = test_numba(A, contexts_per_pos, n, context_threshold)
    # print(len(FCM), len(FCM3))
    # assert len(FCM) == len(FCM3)

    alternative_contexts = [set() for j in range(nr_columns)]
    for j in range(len(FCM)):
        # eligible_contexts2 = [ (variant, tuple([c for c in cn]) , dep) for (variant,cn), dep in FCM[j].items() if dep > max(context_threshold/10, 5)] # remove very low abundant noise before calculation
        eligible_contexts = FCM[j]
        # if len(eligible_contexts2) != len(eligible_contexts):
        #     print(eligible_contexts)
        #     print(eligible_contexts2)
        #     print(max(context_threshold/10, 5))
        #     print( ref_aln[j-9:j+9])
        # assert len(eligible_contexts2) == len(eligible_contexts)
        # if set( [(v,cn) for v,cn,d in eligible_contexts]) !=  set( [(v,cn) for v,cn,d in eligible_contexts2]):
        #     print("Different" )
        #     print(j,eligible_contexts)
        #     print(j,eligible_contexts2)
        #     print(len(FCM), ref_aln[j], ref_aln)
        #     print()
        # else:
        #     pass
        #     # print('Same')

        # print(eligible_contexts)
        # print(FCM3[j])
        # print()
        # filter FCM to only positions with more than one variants
        if len(eligible_contexts) == 0 or len( {v for (v, ref_tmp, dep) in eligible_contexts}) == 1:
            # print(j)
            pass
        # elif len(eligible_contexts) == 1:
        #     # print(eligible_contexts)
        #     # print(eligible_contexts[0])
        #     alternative_contexts[j].add(eligible_contexts[0])
        else:
            eligible_contexts_hcomp = {}
            for variant, aln_seq, depth in sorted(eligible_contexts, key = lambda x: x[2], reverse=True): # highest depth first
                seq = "".join([c for c in aln_seq if c != '-'])
                seq_hpol_comp = ''.join(ch for ch, _ in itertools.groupby(seq))
                if seq_hpol_comp in eligible_contexts_hcomp:
                    continue
                else:
                    min_ed = 1000
                    for prev_h_comp in eligible_contexts_hcomp:
                        # seq_tmp = eligible_contexts_hcomp[prev_h_comp][4]
                        ed = edlib_alignment( prev_h_comp, seq_hpol_comp, 1000)
                        if ed < min_ed:
                            min_ed = ed
                    threshold = context_threshold/max(1.0, min_ed)
                    if depth >= threshold:
                        eligible_contexts_hcomp[seq_hpol_comp] = (variant, depth, aln_seq, threshold, seq)

                # # print(variant,seq, seq_hpol_comp, depth)
                # if seq_hpol_comp not in eligible_contexts_hcomp:
                #     # if not is_substring(seq_hpol_comp,eligible_contexts_hcomp):
                #     eligible_contexts_hcomp[seq_hpol_comp] = (variant, depth, aln_seq)
                # elif depth > eligible_contexts_hcomp[seq_hpol_comp][1]:
                #     eligible_contexts_hcomp[seq_hpol_comp] = (variant, depth, aln_seq)
            # print(j, eligible_contexts_hcomp)

            if len(eligible_contexts_hcomp) > 1:
                for seq_tmp_hcomp, (variant, depth, aln_seq, threshold, seq) in eligible_contexts_hcomp.items():
                    alternative_contexts[j].add( (variant, aln_seq, depth, threshold) )

            # if len( {v for (v, ref_tmp, dep, thresh_) in alternative_contexts[j]}) > 1:
            #     print("MAAAAAAAAAADE IT", [(variant,depth,threshold, "".join([c for c in aln_seq if c != '-']))  for (variant, aln_seq, depth, threshold)  in alternative_contexts[j]] )
        # BUUUG no alternative refs
    # print([ ("".join([c for c in cn if c != '-']) , dep) for cn, dep in FCM[50].items() if ])
    # sys.exit()
    # for i_tmp in range(len(ref_aln)):
    #     depths_per_pos[i_tmp] = defaultdict(int)
    #     for 
    return alternative_contexts

def get_best_corrections(curr_best_seqs, reads, k_size, work_dir,  v_depth_ratio_threshold = 0.1, max_seqs_to_spoa = 200, disable_numpy=False):
    """
        curr_best_seqs is an array with q_id, pos1, pos2
        the current read is on index 0 in curr_best_seqs array
    """
    weight = len(curr_best_seqs)//3

    # print()
    # print()
    # print(weight)
    # print()
    # print()

    reads_path = open(os.path.join(work_dir, "reads_tmp.fa"), "w")
    curr_read_id = curr_best_seqs[0]
    # for i, (q_id, (seq, pos1, pos2)) in enumerate(curr_best_seqs.items()):
    for i, (q_id, pos1, pos2) in  enumerate(grouper(curr_best_seqs, 3)):
        seq = reads[q_id][1][pos1: pos2 + k_size]
        if i > max_seqs_to_spoa:
            break
        reads_path.write(">{0}\n{1}\n".format(str(q_id)+str(pos1)+str(pos2), seq))
    reads_path.close()
    spoa_ref = create_augmented_reference.run_spoa(reads_path.name, os.path.join(work_dir,"spoa_tmp.fa"), "spoa")
    

    ######### Adjust spoa sequence ###############
    #################################################
    #################################################

    start_anchor = reads[curr_read_id][1][:k_size] # curr_best_seqs["curr_read"][0][:k_size]
    stop_anchor = reads[curr_read_id][1][-k_size:]  # curr_best_seqs["curr_read"][0][-k_size:]
    start_index = spoa_ref.find(start_anchor) 
    stop_index = len(spoa_ref) - spoa_ref[::-1].find(stop_anchor[::-1]) 
    # print(start_index, stop_index)
    if start_index >=0 and stop_index <= len(spoa_ref):
        spoa_ref_mod = spoa_ref[start_index: stop_index]
        # if spoa_ref != spoa_ref_mod:
        #     print(spoa_ref)
        #     print(spoa_ref_mod)
        # sys.exit()
        spoa_ref = spoa_ref_mod
    
    
    partition = {"ref" : (0, spoa_ref, spoa_ref, 1)}
    for q_id, pos1, pos2 in  grouper(curr_best_seqs, 3):
        seq = reads[q_id][1][pos1: pos2 + k_size]
    # for q_id, (seq, pos1, pos2) in curr_best_seqs.items():
        res = edlib.align(seq, spoa_ref, task="path", mode="NW")
        cigar_string = res["cigar"]
        read_alignment, ref_alignment = help_functions.cigar_to_seq(cigar_string, seq, spoa_ref)
        partition[(q_id, pos1, pos2)] = (res["editDistance"], ref_alignment, read_alignment, 1)

    alignment_matrix = correct_seqs.create_multialignment_matrix(partition)
    nr_columns = len(alignment_matrix["ref"])
    PFM = [{"A": 0, "C": 0, "G": 0, "T": 0, "U" : 0, "-": 0, "N": 0} for j in range(nr_columns)]
    for r_tmp, aln_list in alignment_matrix.items():
        if r_tmp == "ref":
            continue
        for j, n in enumerate(aln_list):
            PFM[j][n] += 1

    other_corrections = defaultdict(list)

    # TODO: potentially add context around substitutions here
    variant_threshold = max(5, len(curr_best_seqs) * v_depth_ratio_threshold)
    context_threshold = max(5, len(curr_best_seqs) * v_depth_ratio_threshold) # max(5, len(curr_best_seqs) * context_depth_ratio_threshold)
    # print(variant_threshold, context_threshold, len(curr_best_seqs), spoa_ref)

    contexts_per_pos = get_contexts(alignment_matrix, k_size)
    alternative_refs = get_alternative_ref_contexts(alignment_matrix, contexts_per_pos, context_threshold, disable_numpy)
    ref_aln = alignment_matrix["ref"]

    for i, d in enumerate(PFM):
        # calculate reference context here the first thing we do from ref_aln
        if len(alternative_refs[i]) > 1 and len( {v for (v, ref_tmp, dep, thresh_) in alternative_refs[i]}) > 1:
            for q_id_tuple in alignment_matrix:    
                if q_id_tuple == "ref":
                    continue

                read_aln = alignment_matrix[q_id_tuple]
                read_nucl = read_aln[i]
                ref_nucl = alignment_matrix["ref"][i]

                if read_nucl == ref_nucl:
                    other_corrections[q_id_tuple].append(ref_nucl)
                    # print(read_nucl, ref_nucl, [ (v, "".join([c for c in ref_tmp if c != '-']), dep) for (v, ref_tmp,dep) in  alternative_refs[i]])
                    continue

                
                read_context =  tuple(read_aln[contexts_per_pos[i][0] : contexts_per_pos[i][1]])
                
                min_ed = 1000
                ref_to_correct_to = read_nucl
                corr_d = 0
                for (variant, ref_context, depth, thresh_) in alternative_refs[i]:
                    if read_context == ref_context:
                        min_ed = 0
                        ref_to_correct_to = variant
                        corr_d = depth
                        # other_corrections[q_id_tuple].append(read_nucl)
                        # print('Corr (identical) to', (variant, "".join([c for c in ref_context if c != '-']), depth), [ (v, "".join([c for c in ref_tmp if c != '-']), dep) for (v, ref_tmp,dep) in  alternative_refs[i]])
                        break

                    ed = edlib_alignment( "".join([c for c in ref_context if c != '-']), "".join([c for c in read_context if c != '-']), 1000)
                    if ed < min_ed:
                        min_ed = ed
                        ref_to_correct_to = variant
                        corr_d = depth
                
                other_corrections[q_id_tuple].append(ref_to_correct_to)
                # print()
                # print( [ (v, "".join([c for c in ref_tmp if c != '-']), dep) for (v, ref_tmp,dep) in  alternative_refs[i]] )
                # print('Correcting to:',ref_to_correct_to, min_ed, corr_d, "".join([c for c in read_context if c != '-']))
                # print()
        else:

            ref_nucl = alignment_matrix["ref"][i]
            if ref_nucl != "-":
                for q_id_tuple in alignment_matrix:
                    if q_id_tuple == "ref":
                        continue
                    read_aln = alignment_matrix[q_id_tuple]
                    read_nucl = read_aln[i]
                    if read_nucl != "-" and read_nucl != ref_nucl:
                        if variant_threshold <= d[read_nucl]:
                            if ref_aln[contexts_per_pos[i][0]: i] == read_aln[contexts_per_pos[i][0]: i] and ref_aln[i+1: contexts_per_pos[i][1]] == read_aln[i+1: contexts_per_pos[i][1]]: # potential variant position
                                other_corrections[q_id_tuple].append(read_nucl)
                            else:
                                other_corrections[q_id_tuple].append(ref_nucl)

                        else:
                            other_corrections[q_id_tuple].append(ref_nucl)
                    else:
                        other_corrections[q_id_tuple].append(ref_nucl)



    other_corrections_final = defaultdict(list)
    for q_id_tuple in other_corrections:
        other_read_corr = other_corrections[q_id_tuple]
        other_read_corr = "".join([n for n in other_read_corr if n != "-"]) #spoa_ref #
        # print(q_id_tuple)
        q_id, q_p1, q_p2 = q_id_tuple
        # print()
        other_corrections_final[q_id].append( (q_p1 + k_size, q_p2, weight, other_read_corr[k_size:-k_size] ))

        if q_id == curr_read_id:
            curr_read_corr = "".join([n for n in other_read_corr if n != "-"]) #spoa_ref #

    return curr_read_corr[k_size:-k_size], other_corrections_final




def fill_p2(p, all_intervals_sorted_by_finish):

    stop_to_max_j = {stop : j for j, (start, stop, w, _) in enumerate(all_intervals_sorted_by_finish)}
    all_choord_to_max_j = []
    j_max = 0
    for i in range(0, all_intervals_sorted_by_finish[-1][1]):
        if i in stop_to_max_j:
            j_max = stop_to_max_j[i]
        
        all_choord_to_max_j.append(j_max)

    for j, (start, stop, w, _) in enumerate(all_intervals_sorted_by_finish):
        j_max = all_choord_to_max_j[start]
        p.append(j_max)
    return p

def solve_WIS(all_intervals_sorted_by_finish):
    # print("instance size", len(all_intervals_sorted_by_finish))
    # p = [None]
    # fill_p(p, all_intervals_sorted_by_finish)
    p = [None]    
    fill_p2(p, all_intervals_sorted_by_finish)
    # if p != p2:
    #     print(p)
    #     print(p2)
    # assert p == p2

    v = [None] + [w*(stop-start) for (start, stop, w, _) in all_intervals_sorted_by_finish]
    OPT = [0]
    for j in range(1, len(all_intervals_sorted_by_finish) +1):
        OPT.append( max(v[j] + OPT[ p[j] ], OPT[j-1] ) )

    # assert len(p) == len(all_intervals_sorted_by_finish) + 1 == len(v) == len(OPT)

    # Find solution
    opt_indicies = []
    j = len(all_intervals_sorted_by_finish)
    while j >= 0:
        if j == 0:
            break
        if v[j] + OPT[p[j]] > OPT[j-1]:
            opt_indicies.append(j - 1) # we have shifted all indices forward by one so we neew to reduce to j -1 because of indexing in python works
            j = p[j]
        else:
            j -= 1
    return opt_indicies


from itertools import zip_longest
def grouper(iterable, n, fillvalue=None):
    "Collect data into fixed-length chunks or blocks"
    # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"
    args = [iter(iterable)] * n
    return zip_longest(*args, fillvalue=fillvalue)

def add_items(seqs, r_id, p1, p2):
    seqs.append(r_id)
    seqs.append(p1)
    seqs.append(p2)

def find_most_supported_span(r_id, m1, p1, m1_curr_spans, minimizer_combinations_database, reads, all_intervals, k_size, tmp_cnt, read_complexity_cnt, quality_values_database, already_computed):

    acc, seq, qual = reads[r_id]
    for (m2,p2) in m1_curr_spans:
        # print(p1,p2)
        relevant_reads = minimizer_combinations_database[m1][m2]
        seqs = array("I") #{} #defaultdict(list)
        added_strings = {} 
        locations = {}
        # not_added_strings = set() 
        if len(relevant_reads)//3 >= 3: 
            # cnt += 1
            ref_seq = seq[p1  : p2 + k_size]
            # ref_qual = qual[p1 : p2 + k_size]            
            p_error_ref = (quality_values_database[r_id][p2 + k_size] - quality_values_database[r_id][p1])/(p2 + k_size - p1)

            # seqs["curr_read"] = (p1, p2)
            # add_items(seqs, "curr_read", p1, p2)
            add_items(seqs, r_id, p1, p2) 
            locations[0] = len(seqs) - 3
            added_strings[ref_seq] = 0
            reads_visited = {}
            for relevant_read_id, pos1, pos2 in grouper(relevant_reads, 3): #relevant_reads:
                if r_id  == relevant_read_id:
                    continue
                
                read_seq = reads[relevant_read_id][1][pos1: pos2 + k_size]
                #read_qual = reads[relevant_read_id][2][pos1: pos2 + k_size]

                if read_seq == ref_seq:
                    # seqs[relevant_read_id] = (pos1, pos2)
                    add_items(seqs, relevant_read_id, pos1, pos2)
                    locations[relevant_read_id] = len(seqs) - 3
                    reads_visited[relevant_read_id] = 0
                    already_computed[relevant_read_id] = (p1,p2,pos1,pos2, 0)
                    continue
                elif relevant_read_id in reads_visited:
                    # print("Prev:", reads_visited[relevant_read_id])
                    # print("Act:", edlib_alignment(ref_seq, read_seq, p_error_sum_thresh*len(ref_seq)) )
                    pass
        # Implement if we see this to recompute all the aligments exact ed here instead!! Thats the only way to guarantee exactly the same
        # or maybe use this traceback to get exact: https://github.com/Martinsos/edlib/pull/132#issuecomment-522258271
                elif read_seq in added_strings:  #== ref_seq:
                    # seqs[relevant_read_id] = (pos1, pos2)
                    add_items(seqs, relevant_read_id, pos1, pos2)
                    locations[relevant_read_id] = len(seqs) - 3
                    reads_visited[relevant_read_id] = added_strings[read_seq]
                    already_computed[relevant_read_id] = (p1,p2,pos1,pos2, added_strings[read_seq])
                    continue

                elif relevant_read_id in already_computed:
                    curr_ref_start, curr_ref_end, curr_read_start, curr_read_end, curr_ed = already_computed[relevant_read_id]
                    if (curr_read_start <= pos1 and pos2 <= curr_read_end) and (curr_ref_start <= p1 and p2 <=  curr_ref_end):
                        p_error_read = (quality_values_database[relevant_read_id][pos2 + k_size] - quality_values_database[relevant_read_id][pos1])/(pos2 + k_size - pos1)
                        p_error_sum_thresh = p_error_ref + p_error_read # curr_p_error_sum_thresh*len(ref_seq)
                        read_beg_diff = pos1 - curr_read_start
                        read_end_diff = pos2 - curr_read_end
                        ref_beg_diff = p1 - curr_ref_start
                        ref_end_diff = p2 - curr_ref_end

                        ed_est = curr_ed + math.fabs(ref_end_diff - read_end_diff) + math.fabs(read_beg_diff - ref_beg_diff) 
                        if 0 <= ed_est <= p_error_sum_thresh*len(ref_seq): # < curr_p_error_sum_thresh*len(ref_seq):
                            # seqs[relevant_read_id] = (pos1, pos2)
                            add_items(seqs, relevant_read_id, pos1, pos2)
                            locations[relevant_read_id] = len(seqs) - 3
                            added_strings[read_seq] = ed_est
                            reads_visited[relevant_read_id] = ed_est

                            continue

                    else:
                        pass
                


                p_error_read = (quality_values_database[relevant_read_id][pos2 + k_size] - quality_values_database[relevant_read_id][pos1])/(pos2 + k_size - pos1)
                p_error_sum_thresh = p_error_ref + p_error_read #sum([D[char_] for char_ in read_qual])/len(read_qual) #+ 0.1
                editdist = edlib_alignment(ref_seq, read_seq, p_error_sum_thresh*len(ref_seq))

                tmp_cnt += 1
                if editdist >= 0:    # passing second edit distance check
                    if relevant_read_id in reads_visited: # we have already seen the minimizer combination
                        # prev_pos1, prev_pos2 = seqs[relevant_read_id]
                        prev_pos1, prev_pos2 = seqs[ locations[relevant_read_id] + 1], seqs[ locations[relevant_read_id] + 2]
                        prev_read_seq = reads[relevant_read_id][1][prev_pos1: prev_pos2 + k_size]
                        editdist_prev = edlib_alignment(ref_seq, prev_read_seq, len(ref_seq))
                        tmp_cnt += 1
                        read_complexity_cnt += 1

                        if editdist < editdist_prev:
                            # seqs[relevant_read_id] = (pos1, pos2)
                            seqs[locations[relevant_read_id] + 1] = pos1
                            seqs[locations[relevant_read_id] + 2] = pos2
                            added_strings[read_seq] = editdist
                            reads_visited[relevant_read_id] = editdist
                            already_computed[relevant_read_id] = (p1, p2, pos1, pos2, editdist)
                            # print("REPLACED OLD MATCH")
                        # else:
                        #     # seqs[relevant_read_id] = (prev_pos1, prev_pos2)
                        #     added_strings[prev_read_seq] = editdist_prev
                        #     reads_visited[relevant_read_id] = editdist_prev
                        #     already_computed[relevant_read_id] = (p1,p2,prev_pos1, prev_pos2, editdist_prev)
                    else:
                        # seqs[relevant_read_id] = (pos1, pos2)
                        add_items(seqs, relevant_read_id, pos1, pos2)
                        locations[relevant_read_id] = len(seqs) - 3
                        added_strings[read_seq] = editdist
                        reads_visited[relevant_read_id] = editdist
                        already_computed[relevant_read_id] = (p1,p2,pos1,pos2, editdist)


            all_intervals.append( (p1 + k_size, p2,  len(seqs)//3, seqs) )
    del seqs
    return tmp_cnt, read_complexity_cnt

def get_intervals_to_correct(opt_indicies, all_intervals_sorted_by_finish):
    intervals_to_correct =[]
    for j in opt_indicies:
        start, stop, weights, instance = all_intervals_sorted_by_finish[j]
        intervals_to_correct.append( (start, stop, weights, instance))

    return intervals_to_correct

def correct_read(seq, reads, intervals_to_correct, k_size, work_dir, v_depth_ratio_threshold,  max_seqs_to_spoa, disable_numpy, verbose):
    corr_seq = []
    # print(opt_indicies)
    other_reads_corrected_regions = defaultdict(list)
    prev_stop = 0
    for start, stop, weights, instance in intervals_to_correct:
    # for j in opt_indicies:
    #     start, stop, weights, instance = all_intervals_sorted_by_finish[j]
        if start - k_size > prev_stop and prev_stop > 0:
            # print()
            if verbose:
                eprint("Gap in correction:", start-k_size - prev_stop, "between positions:", prev_stop, start, )
            # print()
            # sys.exit()
        prev_stop = stop + k_size

        if isinstance(instance, str): # already corrected
            best_corr = instance
        else:
            best_corr, other_corrections = get_best_corrections(instance, reads, k_size, work_dir, v_depth_ratio_threshold, max_seqs_to_spoa, disable_numpy) # store all corrected regions within all reads in large container and keep track when correcting new read to not re-compute these regions     
            for other_r_id, other_corr_regions in other_corrections.items():
                for region in other_corr_regions:
                    other_reads_corrected_regions[other_r_id].append(region)
        # print(seq[start: stop],  best_corr)
        corr_seq.append((start,stop, best_corr))

    tmp = [seq[0 : corr_seq[0][0]] ]
    for cnt, (start_, stop_, seq_segment) in enumerate(corr_seq):
        tmp.append(seq_segment)
        # print(tmp)
        # if math.fabs( (stop_ - start_) - len(seq_segment)) > 20:
        #     print("Structural correction", stop_ - start_, len(seq_segment))
        #     print(seq_segment)
        #     print(seq[start_: stop_])

        if cnt == len(corr_seq) - 1:
            tmp.append( seq[ stop_ : ] )
        else:
            # print(cnt)
            tmp.append( seq[ stop_ : corr_seq[cnt+1][0]] )
                    
    corr = "".join([s for s in tmp])
    return corr, other_reads_corrected_regions


D = {chr(i) : min( 10**( - (ord(chr(i)) - 33)/10.0 ), 0.79433)  for i in range(128)}

def get_qvs(reads):
    quality_values_database = {}
    for r_id in reads:
        (acc, seq, qual) = reads[r_id]
        quality_values_database[r_id] = [0]
        tmp_tot_sum = 0
        for char_ in qual:
            qv = D[char_]
            quality_values_database[r_id].append( tmp_tot_sum + qv )  #= [D[char_] for char_ in qual]
            tmp_tot_sum += qv
    return quality_values_database



def batch(dictionary, size):
    batches = []
    sub_dict = {}
    for i, (acc, seq) in enumerate(dictionary.items()):
        if i > 0 and i % size == 0:
            batches.append(sub_dict)
            sub_dict = {}
            sub_dict[acc] = seq
        else:
            sub_dict[acc] = seq

    if i/size != 0:
        sub_dict[acc] = seq
        batches.append(sub_dict)
    elif len(dictionary) == 1:
        batches.append(sub_dict)        
    
    return batches

def main(args):
    # start = time()
    all_reads = { i + 1 : (acc, seq, qual) for i, (acc, (seq, qual)) in enumerate(help_functions.readfq(open(args.fastq, 'r')))}
    eprint("Total cluster of {0} reads.".format(len(all_reads)))
    max_seqs_to_spoa = args.max_seqs_to_spoa
    if len(all_reads) <= args.exact_instance_limit:
        args.exact = True
    if args.set_w_dynamically:
        args.w = args.k + min(7, int( len(all_reads)/500))

    eprint("ARGUMENT SETTINGS:")
    for key, value in args.__dict__.items():
        eprint("{0}: {1}".format(key, value))
        # setattr(self, key, value)
    eprint()

    work_dir = tempfile.mkdtemp()
    print("Temporary workdirektory:", work_dir)

    # start = time()
    corrected_reads = {}
    v_depth_ratio_threshold = args.T
    # context_depth_ratio_threshold = args.C
    k_size = args.k
    for batch_id, reads in enumerate(batch(all_reads, args.max_seqs)):
        print("correcting {0} reads in a batch".format(len(reads)))
        batch_start_time = time()

        w = args.w
        x_high = args.xmax
        x_low = args.xmin
        hash_fcn = "lex"
        # for hash_fcn in ["lex"]: # ["lex"]: #  add "rev_lex" # add four others
        if args.compression:
            minimizer_database  = get_minimizers_and_positions_compressed(reads, w, k_size, hash_fcn)
        else:
            minimizer_database  = get_minimizers_and_positions(reads, w, k_size, hash_fcn)

        minimizer_combinations_database = get_minimizer_combinations_database(reads, minimizer_database, k_size, x_low, x_high)
        quality_values_database = get_qvs(reads)
        # print(minimizer_database)
        if args.verbose:
            eprint("done creating minimizer combinations")

        # print( [ (xx, len(reads_to_M2[xx])) for xx in reads_to_M2 ])
        # sys.exit()
        # corrected_reads = {}
        # tot_errors_before = {"subs" : 0, "del": 0, "ins": 0}
        # tot_errors_after = {"subs" : 0, "del": 0, "ins": 0}
        tot_corr = 0
        previously_corrected_regions = defaultdict(list)
        # stored_calculated_regions = defaultdict(lambda: defaultdict(int))
        tmp_cnt = 0

        for r_id in sorted(reads): #, reverse=True):
            read_min_comb = [ ((m1,p1), m1_curr_spans) for (m1,p1), m1_curr_spans in  minimizers_comb_iterator(minimizer_database[r_id], k_size, x_low, x_high)]
            # print(read_min_comb)
            # sys.exit()
            if args.exact:
                previously_corrected_regions = defaultdict(list)
            # stored_calculated_regions = defaultdict(list)
    
            #  = stored_calculated_regions[r_id]
            corr_pos = []
            (acc, seq, qual) = reads[r_id]
            # print("starting correcting:", seq)

            # print(r_id, sorted(previously_corrected_regions[r_id], key=lambda x:x[1]))
            read_previously_considered_positions = set([tmp_pos for tmp_p1, tmp_p2, w_tmp, _ in previously_corrected_regions[r_id] for tmp_pos in range(tmp_p1, tmp_p2)])
            
            if args.verbose:
                if read_previously_considered_positions:
                    eprint("not corrected:", [ (p1_, p2_) for p1_, p2_ in zip(sorted(read_previously_considered_positions)[:-1], sorted(read_previously_considered_positions)[1:]) if p2_ > p1_ + 1 ] )
                else:
                    eprint("not corrected: entire read", )

            if previously_corrected_regions[r_id]:
                read_previously_considered_positions = set([tmp_pos for tmp_p1, tmp_p2, w_tmp, _ in previously_corrected_regions[r_id] for tmp_pos in range(tmp_p1, tmp_p2)])
                group_id = 0
                pos_group = {}
                sorted_corr_pos = sorted(read_previously_considered_positions)
                for p1, p2 in zip(sorted_corr_pos[:-1], sorted_corr_pos[1:]):
                    if p2 > p1 + 1:
                       pos_group[p1] = group_id 
                       group_id += 1
                       pos_group[p2] = group_id 
                    else:
                       pos_group[p1] = group_id 
                if p2 == p1 + 1:
                    pos_group[p2] = group_id 
            else:
                read_previously_considered_positions= set()
                pos_group = {}

            # if r_id in interval_database:
            #     print(len(interval_database[r_id]))
            already_computed = {}
            read_complexity_cnt = 0
            # test_cnt = 0
            # old_cnt = 0
            # test_cnt2 = 0
            all_intervals = []
            # prev_visited_intervals = []


            for (m1,p1), m1_curr_spans in read_min_comb: 
                # If any position is not in range of current corrections: then correct, not just start and stop
                not_prev_corrected_spans = [(m2,p2) for (m2,p2) in m1_curr_spans if not (p1 + k_size in read_previously_considered_positions and p2 - 1 in read_previously_considered_positions) ] 
                set_not_prev = set(not_prev_corrected_spans)
                not_prev_corrected_spans2 = [(m2,p2) for (m2,p2) in m1_curr_spans if (m2,p2) not in set_not_prev and (p1 + k_size in pos_group and p2 - 1 in pos_group and pos_group[p1 + k_size] != pos_group[p2 - 1]) ] 
                not_prev_corrected_spans += not_prev_corrected_spans2

                if not_prev_corrected_spans: # p1 + k_size not in read_previously_considered_positions:
                    tmp_cnt, read_complexity_cnt = find_most_supported_span(r_id, m1, p1, not_prev_corrected_spans, minimizer_combinations_database, reads, all_intervals, k_size, tmp_cnt, read_complexity_cnt, quality_values_database, already_computed)


            # from pympler import asizeof
            # print("reads", asizeof.asizeof(reads)/1000000)
            # print("not_prev_corrected_spans", asizeof.asizeof(not_prev_corrected_spans)/1000000)
            # # print("other_reads_corrected_regions", asizeof.asizeof(other_reads_corrected_regions)/1000000)
            # print("previously_corrected_regions", asizeof.asizeof(previously_corrected_regions)/1000000)
            # print("all_intervals", asizeof.asizeof(all_intervals)/1000000)
            # print("read_min_comb", asizeof.asizeof(read_min_comb)/1000000)
            # print("quality_values_database", asizeof.asizeof(quality_values_database)/1000000)
            # print("already_computed", asizeof.asizeof(already_computed)/1000000)   
            # print("minimizer_database", asizeof.asizeof(minimizer_database)/1000000)            
            # print("minimizer_combinations_database", asizeof.asizeof(minimizer_combinations_database)/1000000)
            # sleep(100)
            # sys.exit()

            # sys.exit()
            if args.verbose:
                print("{0} edlib invoked due to repeated anchors for this read.".format(read_complexity_cnt))
                print(tmp_cnt, "total computed editdist.")
                eprint("Correcting read", r_id)

            # add prev_visited_intervals to intervals to consider
            # all_intervals.extend(prev_visited_intervals)

            if previously_corrected_regions[r_id]: # add previously corrected regions in to the solver
                all_intervals.extend(previously_corrected_regions[r_id])
                del previously_corrected_regions[r_id]

            if not all_intervals:
                # eprint("Found nothing to correct")
                corrected_seq = seq
            else:
                all_intervals.sort(key = lambda x: x[1])
                # print([www for (_, _,  www, _)  in all_intervals])
                opt_indicies = solve_WIS(all_intervals) # solve Weighted Interval Scheduling here to find set of best non overlapping intervals to correct over
                # print(opt_indicies)
                # assert opt_indicies == opt_indicies2
                # print(opt_indicies)
                intervals_to_correct = get_intervals_to_correct(opt_indicies[::-1], all_intervals)
                del all_intervals
                all_intervals = []
                corrected_seq, other_reads_corrected_regions = correct_read(seq, reads, intervals_to_correct, k_size, work_dir, v_depth_ratio_threshold, max_seqs_to_spoa, args.disable_numpy, args.verbose)
                del intervals_to_correct
                for other_r_id, corrected_regions in other_reads_corrected_regions.items():
                    for corr_region in corrected_regions:
                        previously_corrected_regions[other_r_id].append(corr_region)

            # from pympler import asizeof
            # print("reads", asizeof.asizeof(reads)/1000000)
            # # print("read_min_comb", asizeof.asizeof(read_min_comb)/1000000)
            # print("not_prev_corrected_spans", asizeof.asizeof(not_prev_corrected_spans)/1000000)
            # print("other_reads_corrected_regions", asizeof.asizeof(other_reads_corrected_regions)/1000000)
            # print("previously_corrected_regions", asizeof.asizeof(previously_corrected_regions)/1000000)
            # print("all_intervals", asizeof.asizeof(all_intervals)/1000000)
            # print("read_min_comb", asizeof.asizeof(read_min_comb)/1000000)
            # print("quality_values_database", asizeof.asizeof(quality_values_database)/1000000)
            # print("already_computed", asizeof.asizeof(already_computed)/1000000)   
            # print("minimizer_database", asizeof.asizeof(minimizer_database)/1000000)            
            # print("minimizer_combinations_database", asizeof.asizeof(minimizer_combinations_database)/1000000)

            corrected_reads[r_id] = (acc, corrected_seq, "+"*len(corrected_seq))
            if args.verbose:
                print("@{0}\n{1}\n+\n{2}".format(acc, corrected_seq, "+"*len(corrected_seq) ))
                eprint("{0},{1}".format(r_id,corrected_seq))
        print()
        print("Done with batch_id:", batch_id)
        print("Took {0} seconds.".format(time()- batch_start_time))
                # eval_sim2(corrected_seq, seq, qual, tot_errors_before, tot_errors_after)
                # if r_id == 10:
                #     sys.exit()

    # eprint("tot_before:", tot_errors_before)
    # eprint("tot_after:", sum(tot_errors_after.values()), tot_errors_after)
    eprint( len(corrected_reads))
    outfile = open(os.path.join(args.outfolder, "corrected_reads.fastq"), "w")

    for r_id, (acc, seq, qual) in corrected_reads.items():
        outfile.write("@{0}\n{1}\n+\n{2}\n".format(acc, seq, qual))
    outfile.close()

    print("removing temporary workdir")
    shutil.rmtree(work_dir)



if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="De novo error correction of long-read transcriptome reads", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--version', action='version', version='%(prog)s 0.0.4')

    parser.add_argument('--fastq', type=str,  default=False, help='Path to input fastq file with reads')
    # parser.add_argument('--t', dest="nr_cores", type=int, default=8, help='Number of cores allocated for clustering')

    parser.add_argument('--k', type=int, default=9, help='Kmer size')
    parser.add_argument('--w', type=int, default=10, help='Window size')
    parser.add_argument('--xmin', type=int, default=14, help='Upper interval length')
    parser.add_argument('--xmax', type=int, default=80, help='Lower interval length')
    parser.add_argument('--T', type=float, default=0.1, help='Minimum fraction keeping substitution')
    # parser.add_argument('--C', type=float, default=0.05, help='Minimum fraction of keeping alternative refernece contexts')
    parser.add_argument('--exact', action="store_true", help='Get exact solution for WIS for evary read (recalculating weights for each read (much slower but slightly more accuracy,\
                                                                 not to be used for clusters with over ~500 reads)')
    parser.add_argument('--disable_numpy', action="store_true", help='Do not require numpy to be installed, but this version is about 1.5x slower than with numpy.')

    parser.add_argument('--max_seqs_to_spoa', type=int, default=200,  help='Maximum number of seqs to spoa')
    parser.add_argument('--max_seqs', type=int, default=1000,  help='Maximum number of seqs to correct at a time (in case of large clusters).')
    
    parser.add_argument('--exact_instance_limit', type=int, default=0,  help='Activates slower exact mode for instance smaller than this limit')
    # parser.add_argument('--w_equal_k_limit', type=int, default=0,  help='Sets w=k which is slower and more memory consuming but more accurate and useful for smalled clusters.')
    parser.add_argument('--set_w_dynamically', action="store_true", help='Set w = k + max(2*k, floor(cluster_size/1000)).')
    parser.add_argument('--verbose', action="store_true", help='Print various developer stats.')

    parser.add_argument('--compression', action="store_true", help='Use homopolymenr compressed reads. (Deprecated, because we will have fewer \
                                                                        minmimizer combinations to span regions in homopolymenr dense regions. Solution \
                                                                        could be to adjust upper interval legnth dynamically to guarantee a certain number of spanning intervals.')
    parser.add_argument('--outfolder', type=str,  default=None, help='A fasta file with transcripts that are shared between samples and have perfect illumina support.')
    # parser.add_argument('--pickled_subreads', type=str, help='Path to an already parsed subreads file in pickle format')
    # parser.set_defaults(which='main')
    args = parser.parse_args()


    if args.xmin < 2*args.k:
        args.xmin = 2*args.k
        eprint("xmin set to {0}".format(args.xmin))

    if len(sys.argv)==1:
        parser.print_help()
        sys.exit()
    if not args.fastq and not args.flnc and not  args.ccs:
        parser.print_help()
        sys.exit()




    if args.outfolder and not os.path.exists(args.outfolder):
        os.makedirs(args.outfolder)


    # edlib_module = 'edlib'
    # parasail_module = 'parasail'
    # if edlib_module not in sys.modules:
    #     print('You have not imported the {0} module. Only performing clustering with mapping, i.e., no alignment.'.format(edlib_module))
    # if parasail_module not in sys.modules:
    #     eprint('You have not imported the {0} module. Only performing clustering with mapping, i.e., no alignment!'.format(parasail_module))
    #     sys.exit(1)
    if 100 < args.w or args.w < args.k:
        eprint('Please specify a window of size larger or equal to k, and smaller than 100.')
        sys.exit(1)

    main(args)

