# AUTOGENERATED! DO NOT EDIT! File to edit: 23_multi_edit_distances.ipynb (unless otherwise specified).

__all__ = ['get_optimal_distance', 'get_levenshtein_distance', 'get_hamming_distance']

# Cell
def get_optimal_distance(s1,s2, match = 0, mismatch = -1, gap = -1):
    "Computes the optimal matching distance between two sequences using the [Needleman-Wunsch algorithm](https://www.sciencedirect.com/science/article/abs/pii/0022283670900574?via%3Dihub) based on Devon Ryan's implementation found [here](<https://www.biostars.org/p/231391/)."

    penalty = {'MATCH': match, 'MISMATCH': mismatch, 'GAP': gap} #A dictionary for all the penalty valuse.
    n = len(s1) + 1 #The dimension of the matrix columns.
    m = len(s2) + 1 #The dimension of the matrix rows.
    al_mat = np.zeros((m,n),dtype = float) #Initializes the alighment matrix with zeros.
    p_mat = np.zeros((m,n),dtype = str) #Initializes the pointer matrix with zeros.
    #Scans all the first rows element in the matrix and fill it with "gap penalty"
    for i in range(m):
        al_mat[i][0] = penalty['GAP'] * i
        p_mat[i][0] = 'V'
    #Scans all the first columns element in the matrix and fill it with "gap penalty"
    for j in range (n):
        al_mat[0][j] = penalty['GAP'] * j
        p_mat [0][j] = 'H'


    #-------------------------------------------------------
    #This function returns to values for cae of match or mismatch
    def Diagonal(n1,n2,pt):
        if(n1 == n2):
            return pt['MATCH']
        else:
            return pt['MISMATCH']

    #------------------------------------------------------------
    #This function gets the optional elements of the aligment matrix and returns the elements for the pointers matrix.
    def Pointers(di,ho,ve):
        pointer = max(di,ho,ve) #based on python default maximum(return the first element).

        if(di == pointer):
            return 'D'
        elif(ho == pointer):
            return 'H'
        else:
             return 'V'

    #Fill the matrix with the correct values.
    p_mat [0][0] = 0 #Return the first element of the pointer matrix back to 0.
    for i in range(1,m):
        for j in range(1,n):
            di = al_mat[i-1][j-1] + Diagonal(s1[j-1],s2[i-1],penalty) #The value for match/mismatch -  diagonal.
            ho = al_mat[i][j-1] + penalty['GAP'] #The value for gap - horizontal.(from the left cell)
            ve = al_mat[i-1][j] + penalty['GAP'] #The value for gap - vertical.(from the upper cell)
            al_mat[i][j] = max(di,ho,ve) #Fill the matrix with the maximal value.(based on the python default maximum)
            p_mat[i][j] = Pointers(di,ho,ve)

    #print(np.matrix(al_mat))
    #print(np.matrix(p_mat))

    # optimal alignment score = bottom right value in al_mat
    score = al_mat[m-1][n-1]
    #print(score)
    if score == 0: # fixes -0 bug for completeness
        return 0

    return -score

# Cell
def get_levenshtein_distance(s1,s2):
    "Computes the [Levenshtein II distance](https://journals.sagepub.com/doi/abs/10.1177/0049124110362526) between two sequences, which is the optimal distance using only insertions and deletions. This is identical to the `get_optimal_distance` method with a mismatch cost of ~infinity (-9999999) and a gap cost of -1. See the `get_optimal_distance` method with its default parameters for the Levenshtein I distance."
    return get_optimal_distance(s1,s2, match=0, mismatch=-9999999, gap=-1)

# Cell
def get_hamming_distance(s1,s2):
    "Computes the Hamming distance  between two sequences, which is the optimal distance using only substitutions (no indels). This is identical to the `get_optimal_distance` method with a mismatch cost of -1 and a gap cost of ~infinity (-999999). Note that this can only be used on sequences of the same length given the infinite cost of gaps."
    if len(s1) != len(s2):
        raise Exception('sequences provided are not equal length - cannot compute Hamming distance')

    return get_optimal_distance(s1,s2, match=0, mismatch=-1, gap=-999999)