readme_template = '''# Run project

### Requirements
  1. [GCP SDK](https://cloud.google.com/sdk/docs).

### Preparations
  1. Log in using SDK (`gcloud auth application-default login`).
  1. You need to create cloud composer.
  1. You need to create bucket `{project_name}` and inside this bucket create directories `beam_runner/staging` and `beam_runner/temp`.
  1. In your deployment_config change key docker_repository to point your docker registry (we recommend to use one docker repository for all environments) history.
  1. In bucket connected to your docker registry you need to add role Storage Object Admin pointing to Service Account that composer uses.

### Running locally
  1. To run bigquery workflow type in project directory `bf run --workflow wordcount`.
  1. To run apache beam workflow on Dataflow type in project directory `bf run --workflow internationalports`.
  
### Building & deploying
  1. You need to build docker image and DAG file. To do this type in terminal `bf build -e`.
  1. When build is completed in /image directory you can find created docker image and in .dags directory you can find
  created DAG file. To deploy your image and DAG typing `bf deploy -i image/DOCKER_IMAGE_NAME` where DOCKER_IMAGE_NAME is
  name of your docker image found in /image directory.
'''

docker_template = '''FROM python:3.7
COPY ./dist /dist
RUN apt-get -y update && apt-get install -y libzbar-dev libc-dev musl-dev
RUN for i in /dist/*.whl; do pip install $i; done
'''

basic_deployment_config_template = '''from bigflow.configuration import Config

deployment_config = Config(name='dev',
                           properties={{
                               'docker_repository': 'test_repository',
                               'gcp_project_id': '{project_id}',
                               'dags_bucket': '{dags_bucket}'
                           }})
'''

advanced_deployment_config_template = '''.add_configuration(name='{env}',
                           properties={{
                               'gcp_project_id': '{project_id}',
                               'dags_bucket': '{dags_bucket}'
                           }})
'''
requirements_template = '''bigflow[bigquery]==1.0.dev22
apache-beam==2.23.0
google-apitools==0.5.31
wheel==0.35.1
'''

project_setup_template = '''
from setuptools import setup
from bigflow.build import project_setup, auto_configuration

PROJECT_NAME = 'workflows'

if __name__ == '__main__':
    setup(**project_setup(**auto_configuration(PROJECT_NAME)))
'''

beam_pipeline_template = '''def dataflow_pipeline(gcp_project_id, staging_location, temp_location, region, machine_type):
    import uuid
    from pathlib import Path

    import apache_beam as beam

    from apache_beam.options.pipeline_options import SetupOptions, StandardOptions, WorkerOptions, GoogleCloudOptions, \
        PipelineOptions

    from bigflow.resources import create_file_if_not_exists, find_file, create_setup_body, resolve, \
        get_resource_absolute_path
    options = PipelineOptions()

    google_cloud_options = options.view_as(GoogleCloudOptions)
    google_cloud_options.project = gcp_project_id
    google_cloud_options.job_name = f'beam-wordcount-{uuid.uuid4()}'
    google_cloud_options.staging_location = f"gs://{staging_location}"
    google_cloud_options.temp_location = f"gs://{temp_location}"
    google_cloud_options.region = region

    options.view_as(WorkerOptions).machine_type = machine_type
    options.view_as(WorkerOptions).max_num_workers = 2
    options.view_as(WorkerOptions).autoscaling_algorithm = 'THROUGHPUT_BASED'
    options.view_as(StandardOptions).runner = 'DataflowRunner'

    options.view_as(SetupOptions).setup_file = resolve(
        create_file_if_not_exists(find_file('workflows', Path(__file__)).parent / 'setup.py', create_setup_body('workflows')))
    options.view_as(SetupOptions).requirements_file = resolve(get_resource_absolute_path('requirements.txt', Path(__file__)))
    return beam.Pipeline(options=options)

'''
beam_processing_template = '''import apache_beam as beam


def count_words(p, target_method):
    return (p | beam.Create(['a', 'b', 'c', 'd', 'a', 'b', 'c', 'd'])
        | 'PairWithOne' >> beam.Map(lambda x: (x, 1))
        | 'GroupAndSum' >> beam.CombinePerKey(sum)
        | 'save' >> target_method)
'''
beam_workflow_template = '''from apache_beam.io import WriteToText

from bigflow import Workflow

from .pipeline import dataflow_pipeline
from .config import workflow_config
from .processing import count_words


class SimpleJob(object):
    def __init__(self, id, gcp_project_id, staging_location, temp_location, region, machine_type):
        self.id = id
        self.retry_count = 20
        self.retry_pause_sec = 100

        self.gcp_project_id = gcp_project_id
        self.staging_location = staging_location
        self.temp_location = temp_location
        self.region = region
        self.machine_type = machine_type

    def run(self, runtime):
        p = dataflow_pipeline(self.gcp_project_id, self.staging_location, self.temp_location, self.region, self.machine_type)
        count_words(p, WriteToText("gs://{}/beam_wordcount".format(workflow_config['bucket'])))
        p.run().wait_until_finish()


simple_workflow = Workflow(
    workflow_id="test_workflow",
    definition=[SimpleJob(
        'test_workflow',
        gcp_project_id=workflow_config['gcp_project_id'],
        staging_location=workflow_config['staging_location'],
        temp_location=workflow_config['temp_location'],
        region=workflow_config['region'],
        machine_type=workflow_config['machine_type']
    )])
'''
basic_beam_config_template = '''from bigflow.configuration import Config

workflow_config = Config(name='dev',
                           properties={{
                               'gcp_project_id': '{project_id}',
                               'staging_location': '{project_name}/beam_runner/staging',
                               'temp_location': '{project_name}/beam_runner/temp',
                               'region': 'europe-west1',
                               'machine_type': 'n1-standard-1',
                               'bucket': '{bucket}'
                           }})
'''

advanced_beam_config_template = '''.add_configuration(name='{env}',
                           properties={{
                               'gcp_project_id': '{project_id}',
                               'dags_bucket': '{dags_bucket}'
                           }})
'''

test_wordcount_workflow_template = '''from unittest import TestCase

from apache_beam.testing.test_pipeline import TestPipeline
import apache_beam as beam
from workflows.wordcount.processing import count_words


class WordCountWorkflowTestCase(TestCase):

    def test_should_return_result(self):
        fake_file = FakeFile()
        with TestPipeline() as p:
            count_words(p, FakeFileSaver(fake_file))
        self.assertEqual(fake_file.data, {'a': 2, 'b': 2, 'c': 2, 'd': 2})


class FakeFileSaver(beam.PTransform):
    def __init__(self, file):
        super().__init__()
        self.file = file

    def expand(self, records_to_delete):
        return records_to_delete \\
               | "save to file" >> beam.ParDo(
            SaveFn(self.file))


class SaveFn(beam.DoFn):
    def __init__(self, file):
        super().__init__()
        self.file = file

    def process(self, row, *args, **kwargs):
        self.file.data[row[0]] = row[1]

class FakeFile(object):
    data = {}

'''
test_internationalports_workflow_template = """from unittest import TestCase
from unittest.mock import patch

from google.cloud import bigquery
from bigflow.bigquery.dataset_manager import DatasetManager, TemplatedDatasetManager
from workflows.internationalports.workflow import scorer_workflow


class InternationalPortsWorkflowTestCase(TestCase):

    @patch.object(DatasetManager, 'write')
    @patch.object(DatasetManager, 'create_table')
    @patch.object(DatasetManager, 'collect')
    @patch.object(bigquery.Client, 'create_dataset')
    @patch.object(TemplatedDatasetManager, 'create_full_table_id', side_effect=lambda table: 'sc-11309-content-scorer-dev.bigflow_test' + '.' + table)
    @patch.object(DatasetManager, 'table_exists_or_error')
    def test_should_use_proper_queries(self, table_exists_or_error_mock, create_full_table_id_mock, create_dataset_mock, collect__mock, create_table_mock, write_mock):
        table_exists_or_error_mock.return_value = True
        scorer_workflow.run('2019-01-01')
        collect__mock.assert_called_with('''
        INSERT INTO `sc-11309-content-scorer-dev.bigflow_test.more_ports` (port_name, port_latitude, port_longitude, country, index_number)
        VALUES 
        ('SWINOUJSCIE', 53.916667, 14.266667, 'POL', '28820'),
        ('GDYNIA', 54.533333, 18.55, 'POL', '28740'),
        ('GDANSK', 54.35, 18.666667, 'POL', '28710'),
        ('SZCZECIN', 53.416667, 14.55, 'POL', '28823'),
        ('POLICE', 53.566667, 14.566667, 'POL', '28750'),
        ('KOLOBRZEG', 54.216667, 15.55, 'POL', '28800'),
        ('MURMANSK', 68.983333, 33.05, 'RUS', '62950'),
        ('SANKT-PETERBURG', 59.933333, 30.3, 'RUS', '28370');
        ''')

        create_table_mock.assert_any_call('''
        CREATE TABLE IF NOT EXISTS ports (
          port_name STRING,
          port_latitude FLOAT64,
          port_longitude FLOAT64)
    ''')

        create_table_mock.assert_any_call('''
    CREATE TABLE IF NOT EXISTS more_ports (
          port_name STRING,
          port_latitude FLOAT64,
          port_longitude FLOAT64,
          country STRING,
          index_number STRING)
    ''')
        write_mock.assert_called_with('sc-11309-content-scorer-dev.bigflow_test.ports', '''
        SELECT port_name, port_latitude, port_longitude
        FROM `sc-11309-content-scorer-dev.bigflow_test.more_ports`
        WHERE country = 'POL'
        ''', 'WRITE_TRUNCATE')
"""

basic_bq_config_template = '''from bigflow.bigquery import DatasetConfig

INTERNAL_TABLES = ['ports', 'more_ports']

EXTERNAL_TABLES = {{}}

dataset_config = DatasetConfig(env='dev',
                               project_id='{project_id}',
                               dataset_name='bigflow_test',
                               internal_tables=INTERNAL_TABLES,
                               external_tables=EXTERNAL_TABLES
                               )'''

advanced_bq_config_template = """.add_configuration(env='{env}',
                               project_id='{project_id}')"""

bq_processing_template = """from bigflow.bigquery import component

more_ports_insert = '''
        INSERT INTO `{more_ports}` (port_name, port_latitude, port_longitude, country, index_number)
        VALUES 
        ('SWINOUJSCIE', 53.916667, 14.266667, 'POL', '28820'),
        ('GDYNIA', 54.533333, 18.55, 'POL', '28740'),
        ('GDANSK', 54.35, 18.666667, 'POL', '28710'),
        ('SZCZECIN', 53.416667, 14.55, 'POL', '28823'),
        ('POLICE', 53.566667, 14.566667, 'POL', '28750'),
        ('KOLOBRZEG', 54.216667, 15.55, 'POL', '28800'),
        ('MURMANSK', 68.983333, 33.05, 'RUS', '62950'),
        ('SANKT-PETERBURG', 59.933333, 30.3, 'RUS', '28370');
        '''

ports_write_truncate = '''
        SELECT port_name, port_latitude, port_longitude
        FROM `{more_ports}`
        WHERE country = 'POL'
        '''


@component()
def ports(dataset):
    dataset.write_truncate('ports', ports_write_truncate, partitioned=False)


@component()
def populate_more_ports(dataset):
    dataset.collect(more_ports_insert)

"""
bq_workflow_template = '''from bigflow import Workflow
from .config import dataset_config
from .processing import ports, populate_more_ports
from .tables import create_tables

dataset = dataset_config.create_dataset_manager()


create_tables_job = create_tables.to_job(id=None, dependencies_override={'dataset': dataset})
ports_job = ports.to_job(id=None, dependencies_override={'dataset': dataset})
populate_job = populate_more_ports.to_job(id=None, dependencies_override={'dataset': dataset})

scorer_workflow = Workflow(
        workflow_id='test_bigquery_workflow',
        definition=[create_tables_job, populate_job, ports_job],
        schedule_interval='@once')'''
bq_tables_template = """from bigflow.bigquery import component

ports_table = '''
        CREATE TABLE IF NOT EXISTS ports (
          port_name STRING,
          port_latitude FLOAT64,
          port_longitude FLOAT64)
    '''

more_ports_table = '''
    CREATE TABLE IF NOT EXISTS more_ports (
          port_name STRING,
          port_latitude FLOAT64,
          port_longitude FLOAT64,
          country STRING,
          index_number STRING)
    '''


@component()
def create_tables(dataset):
    dataset.create_table(ports_table)
    dataset.create_table(more_ports_table)
"""

