#! /usr/bin/env python3
"""Use UPFP to parse a FASTA into a CSV file."""
import os
import argparse
import pandas as pd
from upfp import parse_fasta, chunker

parser = argparse.ArgumentParser()
parser.add_argument(
    'fasta_filepath', type=str,
    help='path to the FASTA file.'
)
parser.add_argument(
    'csv_filepath', type=str,
    help='path where to store the CSV file.'
)
parser.add_argument(
    '-g', '--gzipped', action='store_true',
    help=(
        'flag to indicate whether the FASTA is gzipped. '
        'Defaults to False.'
    ),
    default=False
)
parser.add_argument(
    '-c', '--chunk_size', type=int,
    help=(
        'size of the chunks used when writing the CSV file. '
        'Defaults to 10000.'
    ),
    default=10000
)

if __name__ == '__main__':
    # parse arguments
    args = parser.parse_args()
    # ensure removal of existing csv file
    if os.path.exists(args.csv_filepath):
        os.remove(args.csv_filepath)
    # run the parser
    sequences = parse_fasta(
        args.fasta_filepath,
        args.gzipped
    )
    # write the csv in chunks
    chunks = chunker(sequences, chunk_size=args.chunk_size)
    # first chunk with header
    chunk = next(chunks)
    pd.DataFrame(
        chunk
    ).to_csv(args.csv_filepath, index=False)
    # remaining chunks
    for chunk in chunks:
        pd.DataFrame(
            chunk
        ).to_csv(args.csv_filepath, header=False, index=False, mode='a')
