#!/usr/bin/env python

import fire
from databricks_tooling.artifact_store import AzureDataLakeClient
from databricks_tooling import orchestrator
import os
import sys

# todo - add execute job
class Orca:
    def __init__(self):
        pass
    
    def create_job(
            self, job_name, file_name, file_path, run_type, version_id,
            max_concurrent_runs, timeout_seconds, schedule,
            config=True, cluster_id=None, storage_account_name=None,
            storage_account_key=None, host=None, headers=None, filesystem_name=None,
            user_name=None, email_notifications=None, repo_name=None, project=None
    ):
        self.max_concurrent_runs = max_concurrent_runs
        self.job_name = job_name
        self.file_name = file_name
        self.file_path = file_path
        self.run_type = run_type
        self.version_id = version_id
        self.cluster_id = cluster_id
        self.storage_account_name = storage_account_name
        self.storage_account_key = storage_account_key
        self.host = host
        self.headers = headers
        self.filesystem_name = filesystem_name
        self.user_name = user_name
        self.email_notifications = email_notifications
        self.timeout_seconds = timeout_seconds
        self.repo_name = repo_name
        self.project = project
        if config:
            self.read_config()

        task_file_path = self.upload_task(
            self.file_name,
            storage_account_name=self.storage_account_name,
            storage_account_key=self.storage_account_key,
            run_type=self.run_type,
            version_id=self.version_id,
            repo_name=self.repo_name,
            file_path=self.file_path,
            filesystem_name=self.filesystem_name,
            project=self.project,
            config=False
        )

        tasks = [
        {
            "task_key": self.job_name,
            "description": "",
            "depends_on": [],
            "existing_cluster_id": self.cluster_id,
            "spark_python_task": {
                "python_file": task_file_path,
                "parameters": []
            },
            "timeout_seconds": 1000,
            "max_retries": 3,
            "min_retry_interval_millis": 10000,
            "retry_on_timeout": "true",
        }]
        #needs to be empty, the code doesn't work otherwise
        job_clusters = []
        timeout_seconds=30
        schedule = {
            # https://www.freeformatter.com/cron-expression-generator-quartz.html
            # runs once in 2094, just so the orchestrator in databricks has something set
            "quartz_cron_expression": f"{schedule}",
            # https://garygregory.wordpress.com/2013/06/18/what-are-the-java-timezone-ids/
            "timezone_id": "America/New_York",
            # 
            "pause_status": "UNPAUSED"
        }
        max_concurrent_runs = self.max_concurrent_runs
        # "MULTI_TASK"
        job_format = "SINGLE_TASK"
        access_control_list = [
            {"user_name": self.user_name,
             "permission_level": "IS_OWNER"}
        ]
        #orchestrator.delete_job(97, host, headers)
        job = orchestrator.create_job(
            self.job_name,
            tasks,
            job_clusters,
            self.email_notifications,
            timeout_seconds,
            schedule,
            self.max_concurrent_runs,
            job_format,
            access_control_list,
            self.host,
            self.headers
        )
        
        print(f"Task File Path: {task_file_path}")
        print(f"Job ID: {int(job.json()['job_id'])}")
    
    def upload_task(
            self,
            file_name,
            run_type,
            storage_account_name=None,
            storage_account_key=None,
            version_id=None,
            repo_name=None,
            file_path=None,
            filesystem_name=None,
            project=None,
            config=True,
    ):
        if config:
            self.read_config()

        storage_account_name = self.storage_account_name
        storage_account_key = self.storage_account_key
        repo_name = self.repo_name
        filesystem_name = self.filesystem_name
        project = self.project

        datalake_client = AzureDataLakeClient(
            storage_account_name,
            filesystem_name,
            storage_account_key=storage_account_key
        )
        if file_path == './':
            directory = f"{repo_name}/{run_type}/{version_id}/"
        else:
            directory = f"{repo_name}/{run_type}/{version_id}/{file_path}"
        datalake_client.create_directory(directory)
        datalake_client.upload_file(file_path+file_name, directory+file_name)
        return f"dbfs:/mnt/{project}/{directory}{file_name}"

    def execute_job(self, job_id):
        orchestrator.execute_job(
            job_id, 
            host,
            headers,
            jar_params,
            notebook_params,
            python_params,
            spark_submit_params
        )
        
    def read_config(self):
        sys.path.append(os.getcwd())
        from config import (
            headers, host,
            storage_account_name,
            storage_account_key,
            cluster_id, filesystem_name,
            user_name, email_notifications,
            repo_name, project
        )
        self.headers = headers
        self.host = host
        self.storage_account_name = storage_account_name
        self.storage_account_key = storage_account_key
        self.cluster_id = cluster_id
        self.filesystem_name = filesystem_name
        self.user_name = user_name
        self.email_notifications = email_notifications
        self.repo_name = repo_name
        self.project = project


if __name__ == '__main__':
    orca = Orca()
    fire.Fire(orca)

