from dataclasses import dataclass
from datetime import datetime
from enum import Enum
from typing import Dict, List, Optional, Union

from strong_typing.schema import (
    JsonObject,
    JsonType,
    Schema,
    json_schema_type,
    register_schema,
)

from .dap_error import ProcessingError

JobID = str
ObjectID = str


@json_schema_type(schema={"type": "string", "format": "uri", "pattern": "^https?://"})
@dataclass
class URL:
    "A Uniform Resource Locator (URL)."

    url: str

    @staticmethod
    def from_json(value: JsonType) -> "URL":
        if not isinstance(value, str):
            raise TypeError("URL must be represented as a string")

        return URL(value)

    def to_json(self) -> str:
        return self.url

    def __str__(self):
        return self.url


@json_schema_type(
    examples=[
        {
            "schema": {
                "type": "object",
                "properties": {
                    "id": {
                        "type": "integer",
                        "format": "int64",
                        "title": "The ID of this version object. Primary key.",
                    },
                    "user_id": {
                        "type": "integer",
                        "format": "int64",
                        "title": "The ID of the submitter. Foreign key to `users.id`.",
                    },
                    "context_id": {
                        "type": "integer",
                        "format": "int64",
                        "title": "The ID of the course this submission belongs to. Foreign key to `courses.id`.",
                    },
                    "context_type": {
                        "type": "string",
                        "enum": ["Course"],
                        "title": "The type of the context object (typically `Course`).",
                    },
                },
                "additionalProperties": False,
                "required": ["id", "context_type"],
                "title": "This table stores information describing previous versions of individual submission objects.",
            },
            "version": 1,
        }
    ]
)
@dataclass
class VersionedSchema:
    """
    The state of the schema at a specific point in time.

    Schemas are backwards compatible. They receive strictly monotonically increasing version numbers as schema
    evolution takes place.

    :param schema: The JSON Schema object to validate against.
    :param version: The version of the schema.
    """

    schema: Schema
    version: int


@json_schema_type
@dataclass
class Object:
    """
    A reference to a binary or text object persisted in object storage, such as a CSV, JSON, or Parquet file.

    The lifetime of the object depends on the operation that created it but typically lasts for 24 hours.
    Object identifiers can be traded for pre-signed URLs via an authenticated endpoint operation while the object exists.

    :param id: Uniquely identifies the object.
    """

    id: ObjectID


@json_schema_type
@dataclass
class Resource:
    """
    A pre-signed URL to a binary or text object persisted in object storage, such as a CSV, JSON or Parquet file.

    The lifetime of the pre-signed URL depends on the operation that created it but typically lasts for 15 minutes.
    No authentication is required to fetch the object via the pre-signed URL.

    :param url: URL to the object.
    """

    url: URL


class JobStatus(Enum):
    "Tracks the lifetime of a job from creation to termination (with success or failure)."

    Waiting = "waiting"
    Running = "running"
    Complete = "complete"
    Failed = "failed"

    def isTerminal(self) -> bool:
        "Signals if a job has been terminated (with 'complete' or 'failed' status)."
        return self is JobStatus.Complete or self is JobStatus.Failed


@dataclass
class TableJob:
    """
    A data access job in progress.

    :param id: Opaque unique identifier of the job.
    :param status: The current status of the job.
    :param expires_at: The time when job will no longer be available.
    """

    id: JobID
    status: JobStatus
    expires_at: Optional[datetime]


@json_schema_type
@dataclass
class CompleteJob(TableJob):
    """
    A data access job that has completed with success.

    :param objects: The list of objects generated by the job.
    :param schema_version: Version of the schema that records in the table conform to.
    """

    objects: List[Object]
    schema_version: int


@json_schema_type
@dataclass
class CompleteSnapshotJob(CompleteJob):
    """
    A snapshot query that has completed with success.

    :param at: Timestamp (in UTC) that identifies the table state. This can be used as a starting point for future incremental queries.
    """

    at: datetime


@json_schema_type
@dataclass
class CompleteIncrementalJob(CompleteJob):
    """
    An incremental query that has completed with success.

    :param since: Start timestamp (in UTC); only those records are returned that have been persisted since the specified date and time.
    :param until: End timestamp (in UTC); only those records are returned that have been persisted before the specified date and time.
    This can be used as a starting point for future incremental queries.
    """

    since: datetime
    until: datetime


@json_schema_type
@dataclass
class FailedJob(TableJob):
    """
    A data access job that has terminated with failure.

    :param error: Provides more details on the error that occurred.
    """

    error: ProcessingError


Job = Union[TableJob, CompleteSnapshotJob, CompleteIncrementalJob, FailedJob]
register_schema(Job, name="Job")


@dataclass
class TableList:
    "A list of tables that exist in the organization domain."

    tables: List[str]


@json_schema_type
@dataclass
class Filter:
    """
    Identifies a subset of data to fetch from a table.

    (This feature is not currently implemented.)

    Filter encompasses column-level (`select`) and row-level (`where`) filters.

    Column-level filters allow clients to include only those columns in the output they are interested in.
    Row-level filters let clients specify a condition that is applied to each record; if the condition is true,
    the record is included in the output, otherwise omitted. Row-level filters are inspired by the
    [where argument](https://hasura.io/docs/latest/queries/postgres/query-filters/#the-where-argument) in Hasura.

    Consider a table with the following columns:
    * `family_name` of type `string`,
    * `given_name` of type `string`,
    * `birth_date` of type `datetime`,
    * `height` of type `number`, representing a person's height in meters,
    * `status`, which is an enumeration that can take the values `active`, `inactive`, `pending`.

    Include only those people whose family name is `Smith`:
    ```json
    { "family_name": { "_eq": "Smith" } }
    ```

    Return people born in the 21st century:
    ```json
    { "birth_date": { "_gte": "2001-01-01" } }
    ```

    Include only those people whose height is between 1.6 and 1.7 meters:
    ```json
    { "height": { "_gt": 1.6, "_lt": 1.7 } }
    ```

    Return people whose status is `active` or `pending`:
    ```json
    { "status": { "_in": ["active", "pending"] } }
    ```

    Find people whose family name is `Smith` and given name starts with `L` or `K`:
    ```json
    { "family_name": { "_eq": "Smith" }, "given_name": { "_match": "^[LK]" } }
    ```

    Find people whose family name is `Smith` *or* whose given name starts with `L` or `K`:
    ```json
    { "_or": [
        { "family_name": { "_eq": "Smith" } },
        { "given_name": { "_match": "^[LK]" } }
    ] }
    ```

    The type of the value matched in the condition must be compatible with the type of the column.

    The following operators are generally available for all types:
    * `_eq` (equal to)
    * `_neq` (not equal to)

    Comparison operators for strings:
    * `_match` (value matches regular expression)

    Comparison operators for numbers, dates and timestamps:
    * `_gt` (greater than)
    * `_lt` (less than)
    * `_gte` (greater than or equal to)
    * `_lte` (less than or equal to)

    Comparison operators for enumerations:
    * `_in` (value in set of values)
    * `_nin` (value not in set of values)

    Operators to build composite Boolean expressions:
    * `_and`
    * `_or`
    * `_not`

    :param select: A list of column names to include in the data set returned; other columns are excluded. If omitted, return all columns.
    :param where: A filter condition to apply to records; only records that meet the condition are included. If omitted, return all records.
    """

    select: Optional[List[str]]
    where: Optional[JsonObject]


@json_schema_type
class Format(Enum):
    """
    Identifies the format of the data returned, e.g. TSV, CSV, JSON Lines, or Parquet.

    Tab-separated values (TSV) is a simple tabular format in which each record (table row) occupies a single line.

    * Output always begins with a header row, which lists all metadata and data field names.
    * Fields (table columns) are delimited by *tab* characters.
    * Non-printable characters and special values are escaped with *backslash* (`\`).

    Comma-separated values (CSV) output follows [RFC 4180](https://www.ietf.org/rfc/rfc4180.html) with a few extensions:

    * Output always begins with a header row, which lists all metadata and data field names.
    * Strings are quoted with double quotes (`"`) if they contain special characters such as the double quote itself,
      the comma delimiter, a newline, a carriage return, a tab character, etc., or if their string representation would
      be identical to a special value such as NULL.
    * Empty strings are always represented as `""`.
    * NULL values are represented with the unquoted literal string `NULL`.
    * Missing values are presented as an empty string (no characters between delimiters).
    * Each row has the same number of fields.

    When the output data is represented in the [JSON Lines](https://jsonlines.org/) format, each record (table row)
    occupies a single line. Each line is a JSON object, which can be validated against the corresponding JSON schema.

    Parquet files are compatible with Spark version 3.0 and later.
    """

    TSV = "tsv"
    CSV = "csv"
    JSONL = "jsonl"
    Parquet = "parquet"


@dataclass
class TableQuery:
    """
    Encapsulates a query request to retrieving data from a table.

    :param format: The format of the data to be returned.
    :param filter: (Not implemented; this field must be omitted.) Any filters applied to the source, e.g. include specific columns, or omit specific rows.
    """

    format: Format
    filter: Optional[Filter]


@json_schema_type
@dataclass
class SnapshotQuery(TableQuery):
    """
    Snapshot queries return the present state of the table.

    Snapshot queries help populate an empty database. After the initial snapshot query, you would use incremental
    queries to get the most up-to-date version of the data.
    """

    pass


@json_schema_type
@dataclass
class IncrementalQuery(TableQuery):
    """
    Incremental queries return consolidated updates to a table, and help update a previous state to the present state.

    If only a *since* timestamp is given (recommended), the operation returns all changes since the specified point in
    time. If multiple updates took place to a record since the specified time, only the most recent version of the
    record is returned.

    If both a *since* and an *until* timestamp is given, the operation returns all records that have changed since
    the start timestamp of the interval but have not been altered after the end timestamp of the interval. Any records
    that have been updated after the *until* timestamp are not included in the query result. This functionality is
    useful to break up larger batches of changes but cannot be reliably used as a means of reconstructing a database
    state in the past (i.e. a point-in-time query or a backup of a previous state).

    The range defined by *since* and *until* is inclusive for the *since* timestamp but exclusive for the *until*
    timestamp.

    You would normally use incremental queries to fetch changes since a snapshot query or a previous incremental query.
    If issued as a follow-up to a snapshot query, the *since* timestamp of the incremental query would be equal to the
    *at* timestamp of the snapshot query. If issued as a follow-up to an incremental query, you would chain the *until*
    timestamp returned by the previous query job with the *since* timestamp of the new query request.

    :param since: Start timestamp (in UTC); only those records are returned that have been persisted since the
    specified date and time. This typically equals `at` returned by a previous snapshot query job, or `until` returned
    by a previous incremental query job.
    :param until: End timestamp (in UTC); only those records are returned that have not been changed after the
    specified date and time. If omitted (recommended), defaults to the commit time of the latest record.
    """

    since: datetime
    until: Optional[datetime]

    def __post_init__(self) -> None:
        if not isinstance(self.since, datetime):
            raise TypeError("timestamp `since` expects datetime type")
        if self.since.tzinfo is None:
            raise ValueError("time zone designator required for timestamp `since`")

        if self.until is not None:
            if not isinstance(self.until, datetime):
                raise TypeError("timestamp `until` expects datetime type")
            if self.until.tzinfo is None:
                raise ValueError("time zone designator required for timestamp `until`")


Query = Union[SnapshotQuery, IncrementalQuery]
register_schema(Query, name="Query")


@dataclass
class ResourceResult:
    """
    Associates object identifiers with pre-signed URLs to output resources.

    :param urls: A dictionary of key-value pairs consisting of an ObjectID and the corresponding resource URL.
    """

    urls: Dict[ObjectID, Resource]


@dataclass
class Credentials:
    """
    Credentials to be passed to API Gateway.

    Access to credentials is managed via the Instructure Identity Service.

    :param api_key: The institution's or its partner's API Key.
    :param client_id: The client ID decoded from the API key.
    :param client_region: The client's region decoded from the API key.
    """

    api_key: str
    client_id: str
    client_region: str


@dataclass
class TokenProperties:
    """
    An authentication/authorization token issued by API Gateway.

    :param access_token: A base64-encoded access token string with header, payload and signature parts.
    :param expires_in: Expiry time (in sec) of the access token. This field is informational, the timestamp is also embedded in the access token.
    :param scope: List of services accessible by the client. Informational field, as the scope is also embedded in the access token.
    :param token_type: Type of the access token.
    """

    access_token: str
    expires_in: int
    scope: str
    token_type: str
