import collections
import itertools
import traceback
import typing
from typing import List

from lime_etl.adapters import timestamp_adapter
from lime_etl.domain import (
    batch,
    batch_delta,
    exceptions,
    job_dependency_errors,
    job_result,
    job_spec,
    shared_resource,
    value_objects,
)
from lime_etl.services import (
    admin_unit_of_work,
    batch_logging_service,
    job_runner,
)


def run(
    *,
    admin_uow: admin_unit_of_work.AdminUnitOfWork,
    batch_id: value_objects.UniqueId,
    batch_name: value_objects.BatchName,
    jobs: typing.Collection[job_spec.JobSpec],
    logger: batch_logging_service.BatchLoggingService,
    resources: typing.Collection[shared_resource.SharedResource[typing.Any]],
    skip_tests: bool,
    ts_adapter: timestamp_adapter.TimestampAdapter,
) -> batch_delta.BatchDelta:
    start_time = ts_adapter.now()
    try:
        dep_results = check_dependencies(jobs)
        if dep_results:
            raise exceptions.DependencyErrors(dep_results)

        with admin_uow as uow:
            previous_results = uow.batch_repo.get_latest()
            new_batch = batch.Batch(
                id=batch_id,
                name=batch_name,
                job_results=frozenset(),
                execution_millis=None,
                execution_success_or_failure=None,
                running=value_objects.Flag(True),
                ts=start_time,
            )
            uow.batch_repo.add(new_batch.to_dto())
            uow.save()

        logger.log_info(f"Staring batch [{batch_id.value}]...")
        result = _run_batch(
            admin_uow=admin_uow,
            batch_id=batch_id,
            batch_logger=logger,
            batch_name=batch_name,
            jobs=jobs,
            resources=resources,
            skip_tests=skip_tests,
            start_time=start_time,
            ts_adapter=ts_adapter,
        )

        with admin_uow as uow:
            uow.batch_repo.update(result.to_dto())
            uow.save()

        logger.log_info(f"Batch [{batch_id.value}] finished.")
        if previous_results:
            previous_results_domain: typing.Optional[
                batch.Batch
            ] = previous_results.to_domain()
        else:
            previous_results_domain = None
        return batch_delta.BatchDelta(
            current_results=result,
            previous_results=previous_results_domain,
        )
    except Exception as e:
        logger.log_error(str(e))
        end_time = ts_adapter.now()
        with admin_uow as uow:
            result = batch.Batch(
                id=batch_id,
                name=batch_name,
                job_results=frozenset(),
                execution_success_or_failure=value_objects.Result.failure(str(e)),
                execution_millis=value_objects.ExecutionMillis.calculate(
                    start_time=start_time, end_time=end_time
                ),
                running=value_objects.Flag(False),
                ts=start_time,
            )
            uow.batch_repo.update(result.to_dto())
            uow.save()
        raise


def check_dependencies(
    jobs: typing.Collection[job_spec.JobSpec], /
) -> typing.Set[job_dependency_errors.JobDependencyErrors]:
    job_names = {job.job_name for job in jobs}
    unresolved_dependencies_by_table = {
        job.job_name: set(dep for dep in job.dependencies if dep not in job_names)
        for job in jobs
        if any(dep not in job_names for dep in job.dependencies)
    }
    unresolved_dependencies = {
        dep for dep_grp in unresolved_dependencies_by_table.values() for dep in dep_grp
    }

    job_names_seen_so_far: typing.List[value_objects.JobName] = []
    jobs_out_of_order_by_table: typing.Dict[
        value_objects.JobName, typing.Set[value_objects.JobName]
    ] = dict()
    for job in jobs:
        job_names_seen_so_far.append(job.job_name)
        job_deps_out_of_order = []
        for dep in job.dependencies:
            if dep not in job_names_seen_so_far and dep not in unresolved_dependencies:
                job_deps_out_of_order.append(dep)
        if job_deps_out_of_order:
            jobs_out_of_order_by_table[job.job_name] = set(job_deps_out_of_order)

    return {
        job_dependency_errors.JobDependencyErrors(
            job_name=job_name,
            missing_dependencies=frozenset(
                unresolved_dependencies_by_table.get(job_name, set())
            ),
            jobs_out_of_order=frozenset(
                jobs_out_of_order_by_table.get(job_name, set())
            ),
        )
        for job_name in set(
            itertools.chain(
                unresolved_dependencies_by_table.keys(),
                jobs_out_of_order_by_table.keys(),
            )
        )
    }


def _check_for_duplicate_job_names(
    jobs: typing.Collection[job_spec.JobSpec], /
) -> None:
    job_names = [job.job_name for job in jobs]
    duplicates = {
        job_name: ct for job_name in job_names if (ct := job_names.count(job_name)) > 1
    }
    if duplicates:
        raise exceptions.DuplicateJobNamesError(duplicates)


def _check_for_missing_resources(
    jobs: typing.Collection[job_spec.JobSpec],
    resources: typing.Collection[shared_resource.SharedResource[typing.Any]],
) -> None:
    resource_names = {r.name for r in resources}
    missing_resources: typing.Mapping[
        value_objects.JobName, typing.List[value_objects.ResourceName]
    ] = collections.defaultdict(list)
    for job in jobs:
        if isinstance(job, job_spec.ETLJobSpec):
            for resource_name in job.resources_needed:
                if resource_name not in resource_names:
                    missing_resources[job.job_name].append(resource_name)

    if missing_resources:
        raise exceptions.MissingResourcesError(missing_resources)


def _is_resource_still_needed(
    remaining_jobs: typing.Collection[job_spec.JobSpec],
    resource_name: value_objects.ResourceName,
) -> bool:
    return any(
        isinstance(job, job_spec.ETLJobSpec) and resource_name in job.resources_needed
        for job in remaining_jobs
    )


def _run_batch(
    batch_id: value_objects.UniqueId,
    batch_name: value_objects.BatchName,
    batch_logger: batch_logging_service.AbstractBatchLoggingService,
    jobs: typing.Collection[job_spec.JobSpec],
    resources: typing.Collection[shared_resource.SharedResource[typing.Any]],
    admin_uow: admin_unit_of_work.AdminUnitOfWork,
    skip_tests: bool,
    start_time: value_objects.Timestamp,
    ts_adapter: timestamp_adapter.TimestampAdapter,
) -> batch.Batch:
    _check_for_missing_resources(jobs=jobs, resources=resources)
    _check_for_duplicate_job_names(jobs)

    job_results: List[job_result.JobResult] = []
    resource_managers = {
        resource.name: shared_resource.ResourceManager(resource)
        for resource in resources
    }
    job_resource_managers = {
        job.job_name: {
            resource_name: resource_managers[resource_name]
            for resource_name in job.resources_needed
        }
        for job in jobs
        if isinstance(job, job_spec.ETLJobSpec)
    }

    for ix, job in enumerate(jobs):
        current_ts = ts_adapter.now()
        with admin_uow as uow:
            last_ts = uow.job_repo.get_last_successful_ts(job.job_name)

        if last_ts:
            seconds_since_last_refresh = (current_ts.value - start_time).total_seconds()
            if seconds_since_last_refresh < job.seconds_between_refreshes.value:
                batch_logger.log_info(
                    f"[{job.job_name.value}] was run successfully {seconds_since_last_refresh:.0f} seconds "
                    f"ago and it is set to refresh every {job.seconds_between_refreshes.value} seconds, "
                    f"so there is no need to refresh again."
                )
                continue

        job_id = value_objects.UniqueId.generate()
        job_logger = batch_logger.create_job_logger()
        result = job_result.JobResult(
            id=job_id,
            batch_id=batch_id,
            job_name=job.job_name,
            test_results=frozenset(),
            execution_millis=None,
            execution_success_or_failure=None,
            running=value_objects.Flag(True),
            ts=start_time,
        )
        with admin_uow as uow:
            uow.job_repo.add(result.to_dto())
            uow.save()

        batch_logger.log_info(f"Opening resources for job [{job.job_name}]...")
        if isinstance(job, job_spec.ETLJobSpec):
            job_resources = {
                name: mgr.open()
                for name, mgr in job_resource_managers[job.job_name].items()
            }
        else:
            job_resources = {}

        try:
            result = job_runner.default_job_runner(
                admin_uow=admin_uow,
                job=job,
                logger=job_logger,
                batch_id=batch_id,
                job_id=job_id,
                resources=job_resources,
                skip_tests=skip_tests,
                ts_adapter=ts_adapter,
            )
        except Exception as e:
            millis = ts_adapter.get_elapsed_time(start_time)
            err_msg =  f"An exception occurred while running [{job.job_name}]: {traceback.format_exc(10)}."
            err = value_objects.Result.failure(err_msg)
            batch_logger.log_error(err_msg)
            result = job_result.JobResult(
                id=job_id,
                batch_id=batch_id,
                job_name=job.job_name,
                test_results=frozenset(),
                execution_millis=millis,
                execution_success_or_failure=err,
                running=value_objects.Flag(False),
                ts=result.ts,
            )
        finally:
            assert result is not None
            job_results.append(result)
            with admin_uow as uow:
                uow.job_repo.update(result.to_dto())
                admin_uow.save()

        if isinstance(job, job_spec.ETLJobSpec):
            # clean up resources no longer needed
            remaining_jobs = list(jobs)[ix + 1 :]
            for resource_name, resource_manager in job_resource_managers[
                job.job_name
            ].items():
                resource_needed = _is_resource_still_needed(
                    remaining_jobs=remaining_jobs,
                    resource_name=resource_name,
                )
                if not resource_needed:
                    resource_manager.close()

    end_time = ts_adapter.now()

    execution_millis = int((end_time.value - start_time.value).total_seconds() * 1000)
    return batch.Batch(
        id=batch_id,
        name=batch_name,
        execution_millis=value_objects.ExecutionMillis(execution_millis),
        job_results=frozenset(job_results),
        execution_success_or_failure=value_objects.Result.success(),
        running=value_objects.Flag(False),
        ts=end_time,
    )
