# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/core.ipynb (unless otherwise specified).

__all__ = ['TSFeatures']

# Cell
import os
import requests
import json
from requests.auth import HTTPBasicAuth
from typing import Optional

import boto3
import pandas as pd

# Cell
class TSFeatures:
    """Compute time series features at scale.
    Send an email to fede.garza.ramirez@gmail.com to request access.

    Parameters
    ----------
    api_id: str
        API identifier.
    api_key: str
        API key.
    aws_access_key_id: str
        AWS ACCESS KEY ID.
    aws_secret_access_key: str
        AWS SECRET ACCESS KEY.
    """

    def __init__(self, api_id: str, api_key: str,
                 aws_access_key_id: Optional[str] = None,
                 aws_secret_access_key: Optional[str] = None) -> 'TSFeatures':
        self.invoke_url = f'https://{api_id}.execute-api.us-east-1.amazonaws.com/main'
        self.api_key = api_key
        self.aws_access_key_id = aws_access_key_id
        self.aws_secret_access_key = aws_secret_access_key

    def _from_api_to_df(self, response: str) -> pd.DataFrame:
        """Transforms api output to df."""
        df = pd.DataFrame(json.loads(response), index=[0])

        return df

    def upload_to_s3(self, file: str, bucketname: str) -> str:
        """Uploads file to s3.

        Parameters
        ----------
        file: str
            Local file.
        bucketname: str
            S3 bucket name.
        """
        if self.aws_access_key_id is None:
            raise Exception(
                'To use `upload_to_s3` you need to pass '
                '`aws_access_key_id` and '
                '`aws_secret_access_key`'
            )

        s3 = boto3.client('s3',
                          aws_access_key_id=self.aws_access_key_id,
                          aws_secret_access_key=self.aws_secret_access_key)

        filename = file.split('/')[-1]
        s3.upload_file(filename, bucketname, filename)

        s3_uri = f's3://{bucketname}/{filename}'

        return s3_uri

    def download_from_s3(self, s3_url: str, file: Optional[str] = None) -> str:
        """Download file from s3.

        Parameters
        ----------
        s3_url: str
            S3 url.
        filename: str
            Destination file
        """
        if self.aws_access_key_id is None:
            raise Exception(
                'To use `upload_to_s3` you need to pass '
                '`aws_access_key_id` and '
                '`aws_secret_access_key`'
            )

        s3 = boto3.client('s3',
                          aws_access_key_id=self.aws_access_key_id,
                          aws_secret_access_key=self.aws_secret_access_key)

        bucketname = s3_url.split('/')[2]
        s3_file = s3_url.split('/')[-1]

        s3.download_file(bucketname, s3_file, s3_file if file is None else file)

    def _calculate_features_from_s3_uri(self, s3_uri: str, freq: int,
                                       kind: str = 'static',
                                       unique_id_column: str = 'unique_id',
                                       ds_column: str = 'ds',
                                       y_column: str = 'y') -> pd.DataFrame:
        """Calculates features from S3 URL.

        Parameters
        ----------
        s3_uri: str
            S3 uri of the dataset.
            The dataset should contain at least three columns:
                - Time series identifier.
                - Time identifier.
                - Target identifier.
        freq: int
            Frequency of the time series.
        kind: str
            Kind of features. Static returns features for each time series.
            temporal returns for each ds and each time series.
        unique_id_column: str
            Column name identifying each time series.
        ds_column: str
            Column name identifying each time stamp.
        y_column: str
            Column name identifying the target variable.

        Notes
        -----
        [1] The dataset should contain time series of the same frequency.
        """
        query = dict(url=s3_uri, freq=freq, kind=kind,
                     unique_id_column=unique_id_column,
                     ds_column=ds_column,
                     y_column=y_column)
        resp = requests.post(f'{self.invoke_url}/tsfeatures',
                             headers={'x-api-key': self.api_key},
                             data=json.dumps(query))

        return self._from_api_to_df(resp.text)

    def calculate_temporal_features_from_s3_uri(self, s3_uri: str, freq: int,
                                                 unique_id_column: str = 'unique_id',
                                                 ds_column: str = 'ds',
                                                 y_column: str = 'y') -> pd.DataFrame:
        """Calculates temporal features from S3 URL.

        Parameters
        ----------
        s3_uri: str
            S3 uri of the dataset.
            The dataset should contain at least three columns:
                - Time series identifier.
                - Time identifier.
                - Target identifier.
        freq: int
            Frequency of the time series.
        unique_id_column: str
            Column name identifying each time series.
        ds_column: str
            Column name identifying each time stamp.
        y_column: str
            Column name identifying the target variable.

        Notes
        -----
        [1] The dataset should contain time series of the same frequency.
        """

        return self._calculate_features_from_s3_uri(s3_uri=s3_uri,
                                                    kind='temporal',
                                                    freq=freq,
                                                    unique_id_column=unique_id_column,
                                                    ds_column=ds_column,
                                                    y_column=y_column)

    def calculate_static_features_from_s3_uri(self, s3_uri: str, freq: int,
                                              unique_id_column: str = 'unique_id',
                                              ds_column: str = 'ds',
                                              y_column: str = 'y') -> pd.DataFrame:
        """Calculates static features from S3 URL.

        Parameters
        ----------
        s3_uri: str
            S3 uri of the dataset.
            The dataset should contain at least three columns:
                - Time series identifier.
                - Time identifier.
                - Target identifier.
        freq: int
            Frequency of the time series.
        unique_id_column: str
            Column name identifying each time series.
        ds_column: str
            Column name identifying each time stamp.
        y_column: str
            Column name identifying the target variable.

        Notes
        -----
        [1] The dataset should contain time series of the same frequency.
        """

        return self._calculate_features_from_s3_uri(s3_uri=s3_uri,
                                                    kind='static',
                                                    freq=freq,
                                                    unique_id_column=unique_id_column,
                                                    ds_column=ds_column,
                                                    y_column=y_column)

    def calculate_calendar_features_from_s3_uri(self, s3_uri: str, country: str,
                                                unique_id_column: str = 'unique_id',
                                                ds_column: str = 'ds',
                                                y_column: str = 'y') -> pd.DataFrame:
        """Calculates static features from S3 URL.

        Parameters
        ----------
        s3_uri: str
            S3 uri of the dataset.
            The dataset should contain at least three columns:
                - Time series identifier.
                - Time identifier.
                - Target identifier.
        country: int
            Country to calculate calendar features from.
        unique_id_column: str
            Column name identifying each time series.
        ds_column: str
            Column name identifying each time stamp.
        y_column: str
            Column name identifying the target variable.
        """
        query = dict(url=s3_uri, country=country,
                     unique_id_column=unique_id_column,
                     ds_column=ds_column,
                     y_column=y_column)
        resp = requests.post(f'{self.invoke_url}/calendartsfeatures',
                             headers={'x-api-key': self.api_key},
                             data=json.dumps(query))

        return self._from_api_to_df(resp.text)

    def get_status(self, job_id: str) -> pd.DataFrame:
        """Gets job status.

        Parameters
        ----------
        job_id: str
            ID job from `calculate_features_from_s3_uri`.
        """
        resp = requests.get(f'{self.invoke_url}/tsfeatures/jobs/',
                            params={'job_id': job_id},
                            headers={'x-api-key': self.api_key})

        return self._from_api_to_df(resp.text)