#!/usr/bin/python3
# coding: utf8

"""ReSuber {} - Software toolbox to re-synchronize and/or translate SRT subtitles from a movie.

ReSuber is an automatic tool to re-synchronize any SRT subtitles, using its corresponding vocal audio WAV stream from a movie.
It uses machine learning techniques to perform language agnostic re-synchronization, by checking the signal correlation
between the vocal audio stream and the corrected subtitle signal.
ReSuber also provide different utilities, including vocal audio extraction, subtitle/video file merging into containers and
automatic translation of SRT subtitles with the Google Cloud Translation API (no API key required).

License MIT.

See https://github.com/polak0v/ReSuber for source code and documentation.
"""

import os
import glob
import shutil
import pysubs2
import argparse
import numpy as np
import resuber.utils as utils
import resuber.calculus as calculus

def run(input_dir=".", reference_language="fr", target_timestamps="eng", encoding="utf8", min_dist=10000, drop_far=False):
    # making output dir
    output_dir = os.path.join(input_dir, "resuber-move")
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # copying all subtitles files into output dir
    files = glob.iglob(os.path.join(input_dir, "*.srt"))
    for file in files:
        if os.path.isfile(file):
            shutil.copy2(file, output_dir)
    
    # reading reference language (the text to be taken) and target timestamps (the timestamps to be used)
    ref_lang_sub_path = glob.glob(os.path.join(output_dir, "*." + reference_language + ".srt"))[0]
    tgt_time_sub_path = glob.glob(os.path.join(output_dir, "*." + target_timestamps + ".srt"))[0]
    output_sub_path = ref_lang_sub_path + ".resubed.srt"
    output_sub = pysubs2.SSAFile()

    ref_lang_sub = pysubs2.load(ref_lang_sub_path, encoding=encoding)
    _, _, tgt_starts, _ = calculus.signal.read_subs(tgt_time_sub_path, encoding=encoding)

    ii = 0
    for sub in ref_lang_sub:
        sub_start = np.int32(sub.start)
        sub_end = np.int32(sub.end)
        nn = np.argmin(np.abs(tgt_starts - sub_start))
        nearest_start = tgt_starts[nn]
        curr_min_dist = np.abs(nearest_start - sub_start)
        # if nearest neighbor too far, current timestamp is preserved or rejected
        if (curr_min_dist < min_dist):
            nearest_end = nearest_start + (sub_end - sub_start)
            output_sub.insert(ii, pysubs2.SSAEvent(start=nearest_start, end=nearest_end, text=sub.text))
            ii = ii + 1
        else:
            if not drop_far:
                output_sub.insert(ii, pysubs2.SSAEvent(start=sub_start, end=sub_end, text=sub.text))
                ii = ii + 1

    calculus.signal.add_credits(output_sub)
    calculus.signal.save_subs(output_sub, output_sub_path, encoding=encoding)

def get_parser():
    """Get the parser.
    
    Returns
    -------
        `argparse.ArgumentParser` : the argument parser
    """
    parser = argparse.ArgumentParser(description=__doc__.format(utils.args.get_version()), formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--input-dir'
                        , dest="input_dir")
    parser.add_argument('--ref-lang'
                        , dest="reference_language")
    parser.add_argument('--tgt-time'
                        , dest="target_timestamps")
    parser.add_argument('--encoding'
                        , dest="encoding")
    parser.add_argument('--min-dist'
                        , dest="min_dist"
                        , type=float
                        , help="Maximum delay (ms) to be considered as a near neighbor (default: 2000ms).")
    parser.add_argument('--drop-far'
                        , dest="drop_far"
                        , help="Drop subtitles that are too far.")
    
    return parser

def main():
    args = get_parser().parse_args()
    args = utils.args.remove_none_args(args)
    run(**vars(args))

if __name__ == '__main__':
    main()