#!/bin/bash
# stream-download -- download, decompress, concatenate urls
# urls may be http, https, ftp, file
# intended to stream fasta sequence files from NCBI, Ensembl, and LRG

log () {
    echo "# $@" 1>&2
    }

stream_decompress () {
    url="$1"
    log "$url"
    case "$url" in
	*.gz)
	    curl -sS "$url" | gzip -cdq
	    echo		# add newline between files
	    ;;
	*.zip)
	    fn=$(mktemp)
	    curl -s -o "$fn" "$url"
	    # LRG fasta files don't have newline after last sequence in file
	    # When extracting to pipe, there are lines like
	    #   ACGT>defline of next file
	    # Since newlines are innocuous, insert newline before every >
	    unzip -p "$fn" | sed -e 's/>/\n>/'
	    /bin/rm "$fn"
	    ;;
	*)
	    curl -s "$url"
	    ;;
    esac
}


############################################################################

set -euo pipefail

if [ $# -eq 0 ]; then
    # From stdin
    while read url; do
	stream_decompress "$url"
    done
else
    # From argv
    for url in "$@"; do
	stream_decompress "$url"
    done
fi
