#!/usr/bin/env python2.7

from __future__ import absolute_import, division, print_function
import argparse
from collections import defaultdict
try:
    from collections import OrderedDict
except ImportError:
    from ordereddict import OrderedDict
import datetime
import subprocess
import sys
import time

import nfldb

import nflgame
import nflgame.live


_batch_size = 5
"""
The number of games to batch before sending data to the database
server.

You might want to set this to a higher number (e.g., 150) if you're
rebuilding the database from scratch. A higher number requires more
memory (where `psycopg2` will fail with a mysterious and seemingly
unrelated error message), so we set it very low for the common case.
"""


def log(*args, **kwargs):
    kwargs['file'] = sys.stderr
    print(*args, **kwargs)
    sys.stderr.flush()


def now():
    return datetime.datetime.now()


def seconds_delta(d):
    """
    The same as `datetime.timedelta.total_seconds` in the standard
    library. Defined here for Python 2.6 compatibility.

    `d` should be a `datetime.timedelta` object.
    """
    return (d.microseconds + (d.seconds + d.days * 24 * 3600) * 10**6) / 10**6


def game_from_id(cursor, gsis_id):
    """
    Returns an `nfldb.Game` object given its GSIS identifier.
    Namely, it looks for a completed or in progress game in nflgame's
    schedule, otherwise it creates a dummy `nfldb.Game` object with
    data from the schedule.
    """
    schedule = nflgame.sched.games[gsis_id]
    start_time = nfldb.types._nflgame_start_time(schedule)
    if seconds_delta(start_time - nfldb.now()) >= 900:
        # Bail quickly if the game isn't close to starting yet.
        return game_from_schedule(cursor, gsis_id)

    g = nflgame.game.Game(gsis_id)
    if g is None:  # Whoops. I guess the pregame hasn't started yet?
        return game_from_schedule(cursor, gsis_id)
    return nfldb.Game._from_nflgame(cursor.connection, g)


def game_from_schedule(cursor, gsis_id):
    """
    Returns an `nfldb.Game` object from schedule data in
    `nflgame.sched`.

    This is useful when you want to avoid initializing a
    `nflgame.game.Game` object.
    """
    s = nflgame.sched.games[gsis_id]
    return nfldb.Game._from_schedule(cursor.connection, s)


def update_season_state(cursor):
    phase_map = nfldb.types.Enums._nflgame_season_phase

    try:
        nflgame.live._update_week_number()
    except:  # Don't let a bad download kill the update script.
        log('FAILED!')
        return
    typ = phase_map[nflgame.live._cur_season_phase]
    cursor.execute('''
        UPDATE meta SET season_type = %s, season_year = %s, week = %s
    ''', (typ, nflgame.live._cur_year, nflgame.live._cur_week))


def run_cmd(*cmd):
    try:
        subprocess.check_call(cmd, stdout=sys.stdout, stderr=sys.stderr)
    except subprocess.CalledProcessError as e:
        log('`%s` failed (exit status %d)' % (' '.join(e.cmd), e.returncode))
    except OSError as e:
        log('`%s` failed [Errno %d]: %s'
            % (' '.join(cmd), e.errno, e.strerror))


def update_players(cursor, interval):
    db = cursor.connection
    cursor.execute('SELECT last_roster_download FROM meta')
    last = cursor.fetchone()['last_roster_download']
    update_due = seconds_delta(nfldb.now() - last) >= interval
    num_existing = nfldb.db._num_rows(cursor, 'player')

    # The interval only applies if the player table has data in it.
    # If it's empty, we always want to try and update regardless of interval.
    if not update_due and num_existing > 0:
        return

    log('Updating player JSON database... (last update was %s)' % last)
    run_cmd(sys.executable, '-m', 'nflgame.update_players', '--no-block')
    log('done.')

    # Reset the player JSON database.
    nflgame.players = nflgame.player._create_players()

    log('Locking player table...')
    cursor.execute('''
        LOCK TABLE player IN SHARE ROW EXCLUSIVE MODE
    ''')

    log('Updating %d players... ' % len(nflgame.players), end='')
    if num_existing == 0:  # Fast path the initial insert.
        inserts = OrderedDict()
        for p in nflgame.players.itervalues():
            dbp = nfldb.Player._from_nflgame_player(db, p)
            for table, prim, vals in dbp._rows:
                inserts.setdefault(table, []).append(vals)
        for table, ins in inserts.items():
            nfldb.db._big_insert(cursor, table, ins)
    else:
        for p in nflgame.players.itervalues():
            dbp = nfldb.Player._from_nflgame_player(db, p)
            for table, _, vals in dbp._rows:
                nfldb.db._upsert(cursor, table, vals, [vals[0]])
    log('done.')

    # If the player table is empty at this point, then something is very
    # wrong. The user MUST fix things before going forward.
    if nfldb.db._num_rows(cursor, 'player') == 0:
        log('Something is very wrong. The player table is empty even after\n'
            'trying to update it. Please seek help. Include the output of\n'
            'this program when asking for help.')
        log('The likely cause here is that the `nflgame-update-players`\n'
            'program is failing somehow. Try running it separately to see\n'
            'if it succeeds on its own.')
        sys.exit(1)

    # Finally, report that we've just update the rosters.
    cursor.execute('UPDATE meta SET last_roster_download = NOW()')


def bulk_insert_game_data(cursor, scheduled):
    """
    Given a list of GSIS identifiers of games that have **only**
    schedule data in the database, perform a bulk insert of all drives
    and plays in the game.
    """
    def do():
        log('\tSending batch of data to database.')
        for table in ('drive', 'play', 'play_player'):  # order matters
            if len(bulk[table]) > 0:
                nfldb.db._big_insert(cursor, table, bulk[table])
                bulk[table] = []

    bulk = OrderedDict()
    queued = 0
    for gsis_id in scheduled:
        if queued >= _batch_size:
            do()
            queued = 0
        g = game_from_id(cursor, gsis_id)

        # This updates the schedule data to include all game meta data.
        # We don't use _save here, as that would recursively upsert all
        # drive/play data in the game.
        for table, prim, vals in g._rows:
            nfldb.db._upsert(cursor, table, vals, prim)

        queued += 1
        for drive in g.drives:
            for table, prim, vals in drive._rows:
                bulk.setdefault(table, []).append(vals)
            for play in drive.plays:
                for table, prim, vals in play._rows:
                    bulk.setdefault(table, []).append(vals)
                for pp in play.play_players:
                    for table, prim, vals in pp._rows:
                        bulk.setdefault(table, []).append(vals)
                    # Whoops. Shouldn't happen often...
                    # Only inserts into the DB if the player wasn't found
                    # in the JSON database. A few weird corner cases...
                    pp.player._save(cursor)

    # Bulk insert leftovers.
    do()


def games_in_progress(cursor):
    """
    Returns a list of GSIS identifiers corresponding to games that
    are in progress. Namely, they are not finished but have at least
    one drive in the database.

    The list is sorted in the order in which the games will be played.
    """
    playing = []
    cursor.execute('''
        SELECT DISTINCT game.gsis_id, game.finished
        FROM drive
        LEFT JOIN game
        ON drive.gsis_id = game.gsis_id
        WHERE game.finished = False AND drive.drive_id IS NOT NULL
    ''')
    for row in cursor.fetchall():
        playing.append(row['gsis_id'])
    return sorted(playing, key=int)


def games_scheduled(cursor):
    """
    Returns a list of GSIS identifiers corresponding to games that
    have schedule data in the database but don't have any drives or
    plays in the database. In the typical case, this corresponds to
    games that haven't started yet.

    The list is sorted in the order in which the games will be played.
    """
    scheduled = []
    cursor.execute('''
        SELECT DISTINCT game.gsis_id, game.start_time
        FROM game
        LEFT JOIN drive
        ON game.gsis_id = drive.gsis_id
        WHERE drive.drive_id IS NULL
    ''')
    for row in cursor.fetchall():
        # This condition guards against unnecessarily processing games
        # that have only schedule data but aren't even close to starting yet.
        # Namely, if a game doesn't have any drives, then there's nothing to
        # bulk insert.
        #
        # We start looking at games when it's 15 minutes before game time.
        # Eventually, the game will start, and the first bits of drive/play
        # data will be bulk inserted. On the next database update, the game
        # will move to the `games_in_progress` list and updated incrementally.
        #
        # So what's the point of bulk inserting? It's useful when updates are
        # done infrequently (like the initial load of the database or say,
        # once a week).
        if seconds_delta(row['start_time'] - nfldb.now()) < 900:
            scheduled.append(row['gsis_id'])
    return sorted(scheduled, key=int)


def games_missing(cursor):
    """
    Returns a list of GSIS identifiers corresponding to games that
    don't have any data in the database.

    The list is sorted in the order in which the games will be played.
    """
    allids = set()
    cursor.execute('SELECT gsis_id FROM game')
    for row in cursor.fetchall():
        allids.add(row['gsis_id'])
    nada = (gid for gid in nflgame.sched.games if gid not in allids)
    return sorted(nada, key=int)


def update_game_schedules(db):
    """
    Updates the schedule data of every game in the database.
    """
    update_nflgame_schedules()
    log('Updating all game schedules... ', end='')
    with nfldb.Tx(db) as cursor:
        lock_tables(cursor)
        for gsis_id in nflgame.sched.games:
            g = game_from_id(cursor, gsis_id)
            for table, prim, vals in g._rows:
                nfldb.db._upsert(cursor, table, vals, prim)
    log('done.')


def update_current_week_schedule(db):
    update_nflgame_schedules()

    phase_map = nfldb.types.Enums._nflgame_season_phase
    phase, year, week = nfldb.current(db)
    log('Updating schedule for (%s, %d, %d)' % (phase, year, week))
    with nfldb.Tx(db) as cursor:
        for gsis_id, info in nflgame.sched.games.iteritems():
            if year == info['year'] and week == info['week'] \
                    and phase == phase_map[info['season_type']]:
                g = game_from_id(cursor, gsis_id)
                for table, prim, vals in g._rows:
                    nfldb.db._upsert(cursor, table, vals, prim)
    log('done.')


def update_nflgame_schedules():
    log('Updating schedule JSON database...')
    run_cmd(sys.executable, '-m', 'nflgame.update_sched')
    log('done.')


def update(db, player_interval=(60 * 60 * 12)):
    """
    Does a single monolithic update of players, games, drives and
    plays.  If `update` terminates, then the database will be
    completely up to date with all current NFL data known by `nflgame`.

    Note that while `update` is executing, all writes to the following
    tables will be blocked: player, game, drive, play, play_player.
    The huge lock is used so that there aren't any races introduced
    when updating the database. Other clients will still be able to
    read from the database.
    """
    # The complexity of this function has one obvious culprit:
    # performance reasons. On the one hand, we want to make infrequent
    # updates quick by bulk-inserting game, drive and play data. On the
    # other hand, we need to be able to support incremental updates
    # as games are played.
    #
    # Therefore, games and their data are split up into three chunks.
    #
    # The first chunk are games that don't exist in the database at all.
    # The games have their *schedule* data bulk-inserted as a place holder
    # in the `game` table. This results in all of the `home_*` and `away_*`
    # fields being set to 0. The schedule data is bulk inserted without
    # ever initializing a `nflgame.game.Game` object, which can be costly.
    #
    # The second chunk are games that have schedule data in the database
    # but have nothing else. In the real world, this corresponds to games
    # in the current season that haven't started yet. Or more usefully,
    # every game when the database is empty. This chunk of games has its
    # drives and play bulk-inserted.
    #
    # The third and final chunk are games that are being played. These games
    # have the slowest update procedure since each drive and play need to be
    # "upserted." That is, inserted if it doesn't exist or updated if it
    # does. On the scale of a few games, performance should be reasonable.
    # (Data needs to be updated because mistakes can be made on the fly and
    # corrected by the NFL. Blech.)
    #
    # Comparatively, updating players is pretty simple. Player meta data
    # changes infrequently, which means we can update it on a larger interval
    # and we can be less careful about performance.

    with nfldb.Tx(db) as cursor:
        # Try to update players first to avoid upserting them.
        update_players(cursor, player_interval)

    with nfldb.Tx(db) as cursor:
        lock_tables(cursor)

        log('Updating season phase, year and week... ', end='')
        update_season_state(cursor)
        log('done.')

        nada = games_missing(cursor)
        if len(nada) > 0:
            log('Adding schedule data for %d games... ' % len(nada), end='')
            insert = OrderedDict()
            for gid in nada:
                g = game_from_schedule(cursor, gid)
                for table, prim, vals in g._rows:
                    insert.setdefault(table, []).append(vals)
            for table, vals in insert.items():
                nfldb.db._big_insert(cursor, table, vals)
            log('done.')

        scheduled = games_scheduled(cursor)
        if len(scheduled) > 0:
            log('Bulk inserting data for %d games...' % len(scheduled))
            bulk_insert_game_data(cursor, scheduled)
            log('done.')

        playing = games_in_progress(cursor)
        if len(playing) > 0:
            log('Updating %d games in progress...' % len(playing))
            for gid in playing:
                g = game_from_id(cursor, gid)
                log('\t%s' % g)
                g._save(cursor)
            log('done.')

        # This *must* come after everything else because it could set
        # the 'finished' flag to true on a game that hasn't been completely
        # updated yet.
        #
        # See issue #42.
        update_current_week_schedule(db)


def lock_tables(cursor):
    log('Locking write access to tables... ', end='')
    cursor.execute('''
        LOCK TABLE player IN SHARE ROW EXCLUSIVE MODE;
        LOCK TABLE game IN SHARE ROW EXCLUSIVE MODE;
        LOCK TABLE drive IN SHARE ROW EXCLUSIVE MODE;
        LOCK TABLE play IN SHARE ROW EXCLUSIVE MODE;
        LOCK TABLE play_player IN SHARE ROW EXCLUSIVE MODE
    ''')
    log('done.')


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Updates the nfldb database. It may be run at any '
                    'frequency, or it may be run in the background with '
                    '--background.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    aa = parser.add_argument
    aa('--interval', type=int, default=None,
       help='When set, nfldb-update will check for active games and update '
            'the database every N seconds, where N is the interval given. '
            'You should NOT specify an interval smaller than 15 seconds, '
            'since NFL.com\'s JSON feed is updated approximately every '
            '15 seconds.')
    aa('--player-interval', type=int, default=(60 * 60 * 12),
       help='The number of seconds between player meta data updates. A longer '
            'interval is needed since meta data does not change frequently '
            'and because each update requires a few dozen HTTP requests to '
            'NFL.com.')
    aa('--update-schedules', action='store_true',
       help='When set, ALL game schedules are refreshed from the data in '
            'nflgame. (In normal operation, only the current week\'s schedule '
            'is refreshed.)')
    args = parser.parse_args()

    def doit():
        log('-' * 79)
        log('STARTING NFLDB UPDATE AT %s' % now())

        log('Connecting to nfldb... ', end='')
        db = nfldb.connect()
        log('done.')

        # We always insert dates and times as UTC.
        log('Setting timezone to UTC... ', end='')
        nfldb.set_timezone(db, 'UTC')
        log('done.')

        if args.update_schedules:
            update_game_schedules(db)
        else:
            update(db, player_interval=args.player_interval)

        log('Closing database connection... ', end='')
        db.close()
        log('done.')

        log('FINISHED NFLDB UPDATE AT %s' % now())
        log('-' * 79)

    if args.interval is None:
        doit()
    else:
        if args.interval < 15:
            log('WARNING: Interval %d is shorter than 15 seconds and is '
                'probably wasteful.\nAre you sure you know what you are doing?'
                % args.interval)
        while True:
            doit()
            time.sleep(args.interval)
