# -*- coding: utf-8 -*-

import oss_tool
import os
from conf import conf

data_path = conf['data_path']

split_result_dir = 'split_results'

part_name = 'split_results/%s.txt'

def truncateDataFile(filename):
    if(os.path.exists(filename)):
        ouputFile = open(filename, 'w')
        ouputFile.truncate()
        ouputFile.close()


def main():

    filename = 'origin_data.txt'

    # 1. download data file
    oss_tool.download_file( data_path, filename)


    # 2. split data file
    if not os.path.exists(split_result_dir):
        os.mkdir(split_result_dir)

    c=0
    num = 0

    inputFile = open(filename, 'r')

    truncateDataFile(part_name % num)
    outputFile = open(part_name % num, 'a')

    while True:
        line = inputFile.readline()

        if not line:
            break

        if c < 3500:
            outputFile.write('%s' % line)
        else:
            outputFile.close()
            c=0
            num+=1
            truncateDataFile(part_name % num)
            outputFile = open(part_name % num, 'a')
            outputFile.write('%s' % line)
        c+=1


    inputFile.close()

    outputFile.close()
    total = num+1
    print('total file: %s ' % total)


    # 3. upload data file
    pre = data_path[0: data_path.rfind('/')]
    print('upload to: %s/%s/' % (pre, split_result_dir) )

    for i in range(0, total):
        print('upload: %s' % ( part_name % i ) )
        oss_tool.upload_file(part_name % i, '%s/%s/%s.txt' % (pre, split_result_dir, i ))


if __name__ == '__main__':
  main()


