#!/bin/bash

if [ "${DIB_DEBUG_TRACE:-0}" -gt 0 ]; then
    set -x
fi
set -eu
set -o pipefail

tmp_dir=$TARGET_ROOT/tmp/spark
mkdir -p $tmp_dir

# The user is not providing his own Spark distribution package
if [ -z "${SPARK_DOWNLOAD_URL:-}" ]; then
    # Check hadoop version
    # INFO on hadoop versions: http://spark.apache.org/docs/latest/hadoop-third-party-distributions.html
    # Now the below is just a sanity check
    if [ -z "${SPARK_HADOOP_DL:-}" ]; then
        case "${DIB_CDH_VERSION:-}" in
            5.5)
                SPARK_HADOOP_DL=hadoop2.7
            ;;
            5.11)
                SPARK_HADOOP_DL=hadoop2.7
            ;;
            *)
                echo "WARNING: Cloudera CDH $DIB_CDH_VERSION not supported."
                echo "WARNING: use the SPARK_DOWNLOAD_URL variable to install a custom Spark version."
                exit 1
            ;;
        esac
    fi

    SPARK_DOWNLOAD_URL="http://archive.apache.org/dist/spark/spark-$DIB_SPARK_VERSION/spark-$DIB_SPARK_VERSION-bin-$SPARK_HADOOP_DL.tgz"
fi

echo "Downloading SPARK"
spark_file=$(basename "$SPARK_DOWNLOAD_URL")
cached_tar="$DIB_IMAGE_CACHE/$spark_file"
$TMP_HOOKS_PATH/bin/cache-url $SPARK_DOWNLOAD_URL $cached_tar
sudo install -D -g root -o root -m 0755 $cached_tar $tmp_dir
echo "$SPARK_DOWNLOAD_URL" > $tmp_dir/spark_url.txt
