# AUTOGENERATED! DO NOT EDIT! File to edit: notebooks/gdrive_activity_tracking.ipynb (unless otherwise specified).

__all__ = ['truncated', 'getOneOf', 'getTimeInfo', 'getActionInfo', 'getUserInfo', 'getActorInfo', 'getTargetInfo',
           'initialize_data', 'get_authors', 'convert_dates', 'get_drive_activity', 'SCOPES', 'identify_actor',
           'check_data', 'clean_data', 'get_data']

# Cell
from nbdev.showdoc import *
import jovsatools
import fastcore
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from collections import defaultdict
import pandas as pd
import numpy as np
import datetime
import seaborn as sns

# Cell
# Returns a string representation of the first elements in a list.
def truncated(array):
    array = list(array)
    contents = ', '.join(array[:])
    return u'[{0}]'.format(contents)


# Returns the name of a set property in an object, or else "unknown".
def getOneOf(obj):
    for key in obj:
        return key
    return 'unknown'


# Returns a time associated with an activity.
def getTimeInfo(activity):
    if 'timestamp' in activity:
        return activity['timestamp']
    if 'timeRange' in activity:
        return activity['timeRange']['endTime']
    return 'unknown'


# Returns the type of action.
def getActionInfo(actionDetail):
    return getOneOf(actionDetail)


# Returns user information, or the type of user if not a known user.
def getUserInfo(user):
    if 'knownUser' in user:
        knownUser = user['knownUser']
        isMe = knownUser.get('isCurrentUser', False)
        return knownUser['personName']
    return getOneOf(user)


# Returns actor information, or the type of actor if not a user.
def getActorInfo(actor):
    if 'user' in actor:
        return getUserInfo(actor['user'])
    return getOneOf(actor)


# Returns the type of a target and an associated title.
def getTargetInfo(target):
    if 'driveItem' in target:
        title = target['driveItem'].get('title', 'unknown')
        return 'driveItem:"{0}"'.format(title)
    if 'drive' in target:
        title = target['drive'].get('title', 'unknown')
        return 'drive:"{0}"'.format(title)
    if 'fileComment' in target:
        parent = target['fileComment'].get('parent', {})
        title = parent.get('title', 'unknown')
        return 'fileComment:"{0}"'.format(title)
    return '{0}:unknown'.format(getOneOf(target))

def initialize_data(feature_list):
    data = {}
    for feature in feature_list:
        data[feature] = []
    return data

def get_authors(df, actor_column):
    authors = set()
    for actors in df[actor_column].values:
        for actor in actors:
            authors.add(actor)
    return authors

def convert_dates(x):
    res = None
    for fmt in ('%Y-%m-%dT%H:%M:%S.%fZ', '%Y-%m-%dT%H:%M:%SZ'):
        try:
            res = datetime.datetime.strptime(x, fmt)
        except ValueError:
            pass
    assert res
    return res

# Cell

# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/drive.activity.readonly']


def get_drive_activity(page_size, loc, debug=False):
    """Shows basic usage of the Drive Activity API.

    Prints information about the last 10 events that occured the user's Drive.
    """
    creds = None
    # The file token.pickle stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)

    service = build('driveactivity', 'v2', credentials=creds)

    # Call the Drive Activity API
    results = service.activity().query(body={
        'pageSize': page_size,
        'ancestorName': 'items/{}'.format(loc)
    }).execute()
    activities = results.get('activities', [])

    if debug:
        if not activities:
            print('No activity.')
        else:
            print('Recent activity:')
            for activity in activities:
                time = getTimeInfo(activity)
                action = getActionInfo(activity['primaryActionDetail'])
                actors = map(getActorInfo, activity['actors'])
                targets = map(getTargetInfo, activity['targets'])
                print(u'{0}: {1}, {2}, {3}'.format(time,
                                                   truncated(actors),
                                                   action,
                                                   truncated(targets)
                                                  ))

    feature_list = ['time', 'action', 'actor', 'target']
    data = initialize_data(feature_list)
    for activity in activities:
            time = getTimeInfo(activity)
            action = getActionInfo(activity['primaryActionDetail'])
            actors = list(map(getActorInfo, activity['actors']))
            targets = list(map(getTargetInfo, activity['targets']))

            data['time'].append(time)
            data['action'].append(action)
            data['actor'].append(actors)
            data['target'].append(targets)
    return pd.DataFrame.from_dict(data)

# Cell

def identify_actor(actor, actor_names):
    assert isinstance(actor, list)
    if len(actor) == 1:
        actor = actor[0].strip().split("/")[-1]
        return actor_names.get(actor, "UNOWN ACTOR")
    else:
        return "TOO MANY ACTORS"

def check_data(data, checks, actor_names):

    assert checks
    assert set(data['actor_name']) == set(actor_names.values()), "unknown actors"
    assert checks['min_year'] in  set(data['year']), "missing some old year data"
    assert checks['min_month'] in  set(data['month']), "missing some old month data"


def clean_data(data, actor_names):
    # time
    time_cleaned = 'time_cleaned_UTC'
    data[time_cleaned] = data.apply(lambda x: convert_dates(x['time']), axis=1)
    data['hour'] = data[time_cleaned].dt.hour
    data['day'] = data[time_cleaned].dt.day
    data['day_of_week'] = data[time_cleaned].dt.dayofweek
    data['day_name'] = data[time_cleaned].dt.day_name()
    data['week'] = data[time_cleaned].dt.week
    data['month'] = data[time_cleaned].dt.month
    data['year'] = data[time_cleaned].dt.year


    # actor
    if actor_names:
        data['actor_name'] = data.apply(lambda x: identify_actor(x['actor'], actor_names), axis=1)
    return data


def get_data(page_size, loc, actor_names, checks):
    data = get_drive_activity(page_size, loc)
    data = clean_data(data, actor_names)

    # run checks
    check_data(data, checks, actor_names)

    return data