"""
Realization of data operations.
"""


import abc
import collections
from typing import Any, Iterable, Set, Dict, List, Optional, Tuple, Union
import numbers
import re

import numpy

import data_algebra.expr_parse
import data_algebra.flow_text
import data_algebra.data_model
import data_algebra.expr_rep
from data_algebra.data_ops_types import MethodUse, OperatorPlatform
import data_algebra.data_ops_utils
import data_algebra.near_sql
from data_algebra.OrderedSet import (
    OrderedSet,
    ordered_intersect,
    ordered_union,
)
import data_algebra.util
from data_algebra.shift_pipe_action import ShiftPipeAction


_have_black = False
try:
    # noinspection PyUnresolvedReferences
    import black

    _have_black = True
except ImportError:
    pass


# noinspection PyBroadException
def pretty_format_python(python_str: str, *, black_mode=None) -> str:
    """
    Format Python code, using black.

    :param python_str: Python code
    :param black_mode: options for black
    :return: formatted Python code
    """
    assert isinstance(python_str, str)
    formatted_python = python_str
    if _have_black:
        try:
            if black_mode is None:
                black_mode = black.FileMode()
            formatted_python = black.format_str(python_str, mode=black_mode)
        except Exception:
            pass
    return formatted_python


def _assert_tables_defs_consistent(tm1: Dict, tm2: Dict):
    common_keys = set(tm1.keys()).intersection(tm2.keys())
    for k in common_keys:
        t1 = tm1[k]
        t2 = tm2[k]
        if not t1.same_table_description_(t2):
            raise ValueError("Table " + k + " has two incompatible representations")


def _convert_on_clause_to_parallel_lists(on) -> Tuple[List[str], List[str]]:
    on_a = []
    on_b = []
    if on is not None:
        if isinstance(on, str):
            on_a = [on]
            on_b = [on]
        elif isinstance(on, Iterable):
            if isinstance(on, Dict):
                on = list(on.items())
            else:
                on = list(on)
            for v in on :
                if isinstance(v, str):
                    on_a.append(v)
                    on_b.append(v)
                else:
                    v = list(v)
                    assert len(v) == 2
                    assert isinstance(v[0], str)
                    assert isinstance(v[1], str)
                    on_a.append(v[0])
                    on_b.append(v[1])
        else:
            raise ValueError(f"unexpected on-argument type {type(on)}")
    return on_a, on_b


def _convert_parallel_lists_to_on_clause(on_a: Iterable[str], on_b: Iterable[str]):
    assert isinstance(on_a, List)
    assert numpy.all([isinstance(v, str) for v in on_a])
    assert isinstance(on_b, List)
    assert numpy.all([isinstance(v, str) for v in on_b])
    assert len(on_a) == len(on_b)
    if len(on_a) <= 0:
        return []
    
    def enc_i(v_a, v_b):
        if v_a == v_b:
            return v_a
        return (v_a, v_b)
    
    return [enc_i(v_a, v_b) for v_a, v_b in zip(on_a, on_b)]
 

def _work_col_group_arg(arg, *, arg_name: str, columns: Iterable[str]):
    """convert column list to standard form"""
    if arg is None:
        return []
    elif isinstance(arg, str):
        assert arg in set(columns)
        return [arg]
    elif isinstance(arg, Iterable):
        res = list(arg)
        assert len(res) == len(set(res))
        col_set = set(columns)
        assert numpy.all([col in col_set for col in arg])
        return res
    elif arg == 1:
        return 1
    assert ValueError(f"Need {arg_name} to be a list of strings or 1, got {arg}")


class ViewRepresentation(OperatorPlatform, abc.ABC):
    """Structure to represent the columns of a query or a table.
    Abstract base class."""


    column_names: Tuple[str, ...]
    sources: Tuple[
        "ViewRepresentation", ...
    ]  # https://www.python.org/dev/peps/pep-0484/#forward-references
    key: Optional[str]

    def __init__(
        self,
        column_names: Iterable[str],
        *,
        sources: Optional[Iterable["ViewRepresentation"]] = None,
        node_name: str,
        key: Optional[str] = None,
    ):
        # don't let instances masquerade as iterables
        assert not isinstance(column_names, str)
        assert isinstance(node_name, str)
        assert not isinstance(sources, OperatorPlatform)
        assert isinstance(key, (str, type(None)))
        if not isinstance(column_names, tuple):
            column_names = tuple(column_names)
        assert len(column_names) > 0
        for v in column_names:
            assert isinstance(v, str)
        assert len(column_names) == len(set(column_names))
        self.column_names = column_names
        if sources is None:
            sources = ()
        else:
            if not isinstance(sources, tuple):
                sources = tuple(sources)
        for si in sources:
            assert isinstance(si, ViewRepresentation)
        self.sources = sources
        self.key = key
        OperatorPlatform.__init__(self, node_name=node_name)

    def column_map(self) -> collections.OrderedDict:
        """
        Build a map of column names to ColumnReferences
        """
        res = collections.OrderedDict()
        for ci in self.column_names:
            res[ci] = data_algebra.expr_rep.ColumnReference(ci)
        return res

    def merged_rep_id(self) -> str:
        """
        String key for lookups.
        """
        return "ops+ " + str(id(self))

    # convenience

    def ex(self, *, data_model=None, allow_limited_tables: bool = False):
        """
        Evaluate operators with respect to Pandas data frames already stored in the operator chain.

        :param data_model: adaptor to data dialect (Pandas for now)
        :param allow_limited_tables: logical, if True allow execution on non-complete tables
        :return: table result
        """
        assert isinstance(allow_limited_tables, bool)
        tables = self.get_tables()
        data_map = dict()
        for tv in tables.values():
            assert isinstance(tv, TableDescription)
            assert tv.head is not None
            if len(tables) > 1:
                assert tv.table_name_was_set_by_user
            if not allow_limited_tables:
                assert tv.nrows == tv.head.shape[0]
            data_map[tv.table_name] = tv.head
        return self.eval(data_map=data_map, data_model=data_model, strict=True)

    # characterization

    def get_tables(self):
        """Get a dictionary of all tables used in an operator DAG,
        raise an exception if the values are not consistent."""

        tables = dict()
        # eliminate recursions by stepping through sources
        visit_stack = list()
        visit_stack.append(self)
        while len(visit_stack) > 0:
            cursor = visit_stack.pop()
            if isinstance(cursor, TableDescription):
                k = cursor.key
                v = cursor
                if k in tables.keys():
                    if not v.same_table_description_(tables[k]):
                        raise ValueError(
                            "Table " + k + " has two incompatible representations"
                        )
                else:
                    tables[k] = v
            else:
                for s in cursor.sources:
                    visit_stack.append(s)
        return tables

    @abc.abstractmethod
    def columns_used_from_sources(self, using: Optional[set] = None) -> List[str]:
        """
        Get columns used from sources. Internal method.

        :param using: optional column restriction.
        :return: list of order sets (list parallel to sources).
        """

    def methods_used(self) -> Set[MethodUse]:
        """
        Return set of methods used.
        """
        res: Set[MethodUse] = set()
        self.get_method_uses_(res)
        return res

    def get_method_uses_(self, methods_seen: Set[MethodUse]) -> None:
        """
        Implementation of get methods_used(), internal method.

        :params methods_seen: set to collect results in.
        :return: None
        """
        for s in self.sources:
            s.get_method_uses_(methods_seen)

    def columns_produced(self) -> List[str]:
        """Return list of columns produced by operator dag."""
        return list(self.column_names)
    
    def cod(self, *, table_name: Optional[str] = None):
        """
        Description of operator co-domain, a table description.

        :param table_name: optional name for table
        :return: TableDescription representing produced columns.
        """
        assert isinstance(table_name, (str, type(None)))
        return TableDescription(
                table_name=table_name,
                column_names=self.columns_produced(),
        )

    def columns_used_implementation_(
        self, *, using, columns_currently_using_records
    ) -> None:
        """Implementation of columns used calculation, internal method."""
        self_merged_rep_id = self.merged_rep_id()
        try:
            crec = columns_currently_using_records[self_merged_rep_id]
        except KeyError:
            crec = OrderedSet()
            columns_currently_using_records[self_merged_rep_id] = crec
        if using is None:
            crec.update(self.column_names)
        else:
            unknown = set(using) - set(self.column_names)
            if len(unknown) > 0:
                raise ValueError("asked for unknown columns: " + str(unknown))
            crec.update(using)
        cu_list = self.columns_used_from_sources(crec.copy())
        for i in range(len(self.sources)):
            self.sources[i].columns_used_implementation_(
                using=cu_list[i],
                columns_currently_using_records=columns_currently_using_records,
            )

    def columns_used(self, *, using=None) -> Dict:
        """Determine which columns are used from source tables."""

        tables = self.get_tables()
        columns_currently_using_records = {
            v.merged_rep_id(): set() for v in tables.values()
        }
        self.columns_used_implementation_(
            using=using, columns_currently_using_records=columns_currently_using_records
        )
        columns_used = dict()
        for k, ti in tables.items():
            vi = columns_currently_using_records[ti.merged_rep_id()]
            columns_used[k] = vi.copy()
        return columns_used
    
    def dom(self):
        """
        Description of domain.
        
        :return: map of tables names to table descriptions
        """
        tb_map = self.get_tables()
        res = {k: TableDescription(table_name=k if t.table_name_was_set_by_user else None, column_names=t.column_names) for k, t in tb_map.items()}
        return res

    # printing

    def to_python_src_(self, *, indent=0, strict=True, print_sources=True) -> str:
        """
        Return text representing operations. Internal method, allows skipping of sources.

        :param indent: additional indent to apply in formatting.
        :param strict: if False allow eliding of columns names and other long structures.
        :param print_sources: logical, print children.
        """
        return "ViewRepresentation(" + self.column_names.__repr__() + ")"

    # noinspection PyBroadException
    def to_python(self, *, indent=0, strict=True, pretty=False, black_mode=None) -> str:
        """
        Return Python source code for operations.

        :param indent: extra indent.
        :param strict: if False allow eliding of columns names and other long structures.
        :param pretty: if True re-format result with black.
        :param black_mode: black formatter parameters.
        """
        self.columns_used()  # for table consistency check/raise
        if pretty:
            strict = True
        python_str = (
            "(\n"
            + self.to_python_src_(indent=indent, strict=strict, print_sources=True)
            + "\n)\n"
        )
        if pretty:
            python_str = pretty_format_python(python_str, black_mode=black_mode)
        return python_str

    def __repr__(self):
        return self.to_python(strict=True, pretty=True)

    def __str__(self):
        return self.to_python(strict=True, pretty=True)
    
    def _repr_pretty_(self, p, cycle):
        """
        IPython pretty print
        https://ipython.readthedocs.io/en/stable/config/integrating.html
        """
        if cycle:
            p.text("ViewRepresentation()")
        else:
            p.text(self.to_python(strict=True, pretty=True))


    # noinspection PyMethodMayBeStatic,PyUnusedLocal
    @abc.abstractmethod
    def _equiv_nodes(self, other):
        """Check if immediate ops structure is equivalent, does not check child nodes"""

    def __eq__(self, other):
        if not isinstance(other, ViewRepresentation):
            return False
        if not type(self) is type(other):
            return False
        if not type(other) is type(self):
            return False
        if self.node_name != other.node_name:
            return False
        if self.column_names != other.column_names:
            return False
        if len(self.sources) != len(other.sources):
            return False
        if not self._equiv_nodes(other):
            return False
        for i in range(len(self.sources)):
            if not self.sources[i].__eq__(other.sources[i]):
                return False
        return True

    def __ne__(self, other):
        return not self.__eq__(other)
    
    # composition

    def act_on(self, b, *, correct_ordered_first_call: bool = False):
        """
        apply self to b, must associate with composition
        Operator is strict about column names.

        :param b: input data frame
        :param correct_ordered_first_call: indicate not on fallback path
        :return: transformed or composed result
        """
        assert isinstance(correct_ordered_first_call, bool)
        tables = self.get_tables()
        if isinstance(b, ViewRepresentation):
            # insert to only table or if more than one, table with matching key
            if len(tables) == 1:
                key = list(tables.keys())[0]
            else:
                key = b.key
            assert isinstance(key, str)
            old = tables[key]
            assert set(b.column_names) == set(old.column_names)  # this is defending associativity of composition against table narrowing
            return self.replace_leaves({key: b})
        # see if b is ShiftPipeAction, so it can handle the mapping (using fact data is not a ShiftPipeAction instance)
        if correct_ordered_first_call and isinstance(b, ShiftPipeAction):
            return b.act_on(self, correct_ordered_first_call=False)
        # assume a table
        assert len(tables) == 1
        key = list(tables.keys())[0]
        assert isinstance(key, str)
        old = tables[key]
        assert set(b.columns) == set(old.column_names)
        return self.transform(
            b,
            strict=True,
        )

    # query generation

    @abc.abstractmethod
    def to_near_sql_implementation_(
        self, db_model, *, using, temp_id_source, sql_format_options=None
    ) -> data_algebra.near_sql.NearSQL:
        """
        Convert operator dag into NearSQL type for translation to SQL string.

        :param db_model: database model
        :param using: optional column restriction set
        :param temp_id_source: source of temporary ids
        :param sql_format_options: options for sql formatting
        :return: data_algebra.near_sql.NearSQL
        """

    def to_sql(
        self,
        db_model=None,
        *,
        sql_format_options=None,
    ) -> str:
        """
        Convert operator dag to SQL.

        :param db_model: database model or handle data_algebra.db_model.DBModel or data_algebra.db_model.DBHandle
        :param sql_format_options: options for sql formatting
        :return: string representation of SQL query
        """
        if db_model is None:
            import data_algebra.SQLite  # import late to avoid circular import issue

            db_model = data_algebra.SQLite.SQLiteModel()
        return db_model.to_sql(
            ops=self,
            sql_format_options=sql_format_options,
        )

    # Pandas realization

    def check_constraints(self, data_map, *, strict: bool = True):
        """
        Check tables supplied meet data consistency constraints.

        data_map: dictionary of column name lists.
        """
        assert isinstance(strict, bool)
        self.columns_used()  # for table consistency check/raise
        tables = self.get_tables()
        missing_tables = set(tables.keys()) - set(data_map.keys())
        if len(missing_tables) > 0:
            raise ValueError("missing required tables: " + str(missing_tables))
        for k in tables.keys():
            have = set(data_map[k])
            td = tables[k]
            require = set(td.column_names)
            missing = require - have
            if len(missing) > 0:
                raise ValueError(
                    "Table " + k + " missing required columns: " + str(missing)
                )
            if strict:
                excess = have - require
                if len(excess) > 0:
                    raise ValueError(
                        "Table " + k + " excess columns columns: " + str(excess)
                    )

    def eval(
        self,
        data_map: Dict[str, Any],
        *,
        data_model=None,
        strict: bool = False,
    ):
        """
        Evaluate operators with respect to Pandas data frames.

        :param data_map: map from table names to data frames or data sources
        :param data_model: adaptor to data dialect (Pandas for now)
        :param strict: if True, throw on unexpected columns
        :return: table result
        """
        assert isinstance(data_map, dict)
        assert isinstance(strict, bool)
        self.columns_used()  # for table consistency check/raise
        tables = self.get_tables()
        if len(tables) > 0:
            all_op_platforms = True
            for k in tables.keys():
                v = data_map[k]
                if not isinstance(v, OperatorPlatform):
                    all_op_platforms = False
                    if data_model is None:
                        data_model = data_algebra.data_model.lookup_data_model_for_dataframe(v)
                    if not data_model.is_appropriate_data_instance(v):
                        raise ValueError(f"data_map[{k}] type {type(v)} not appropriate for data model {data_model}")
            if all_op_platforms:
                # apply self to replacement leaf definitions
                return self.replace_leaves(data_map)
        if data_model is None:
            data_model = data_algebra.data_model.default_data_model()
        assert isinstance(data_model, data_algebra.data_model.DataModel)
        self.check_constraints(
            {k: data_map[k].columns for k in tables.keys()}, strict=strict
        )
        return data_model.eval(op=self, data_map=data_map)

    # noinspection PyPep8Naming
    def transform(
        self,
        X,
        *,
        data_model=None,
        strict: bool = False,
    ):
        """
        Apply data transform to a table

        :param X: tale to apply to
        :param data_model: data model for Pandas execution
        :param strict: if True, throw on unexpected columns
        :return: transformed data frame
        """
        assert isinstance(strict, bool)
        tables = self.get_tables()
        if len(tables) != 1:
            raise ValueError(
                "transform(DataFrame) can only be applied to ops-dags with only one table def"
            )
        k = list(tables.keys())[0]
        data_map = {k: X}
        return self.eval(
            data_map=data_map,
            data_model=data_model,
            strict=strict,
        )

    # composition (used to eliminate intermediate order nodes)

    def is_trivial_when_intermediate_(self) -> bool:
        """
        Return if True if operator can be eliminated from interior chain.
        """
        return False

    # return table representation of self
    def as_table_description(self, table_name: str, *, qualifiers=None):
        """
        Return representation of operator as a table description.

        :param table_name: table name to use.
        :param qualifiers: db qualifiers to annotate
        """
        return TableDescription(
            table_name=table_name,
            column_names=self.column_names,
            qualifiers=qualifiers,
        )

    # implement builders for all non-initial ops types on base class
    def extend_parsed_(
        self, parsed_ops, *, partition_by=None, order_by=None, reverse=None
    ) -> "ViewRepresentation":
        """
        Add new derived columns, can replace existing columns for parsed operations. Internal method.

        :param parsed_ops: dictionary of calculations to perform.
        :param partition_by: optional window partition specification, or 1.
        :param order_by: optional window ordering specification, or 1.
        :param reverse: optional order reversal specification.
        :return: compose operator directed acyclic graph
        """
        if (parsed_ops is None) or (len(parsed_ops) < 1):
            return self
        partition_by = _work_col_group_arg(
            partition_by, arg_name="partition_by", columns=self.column_names
        )
        order_by = _work_col_group_arg(
            order_by, arg_name="order_by", columns=self.column_names
        )
        reverse = _work_col_group_arg(
            reverse, arg_name="reverse", columns=self.column_names
        )
        assert reverse != 1
        new_cols_produced_in_calc = set([k for k in parsed_ops.keys()])
        if (partition_by != 1) and (len(partition_by) > 0):
            if len(new_cols_produced_in_calc.intersection(partition_by)) > 0:
                raise ValueError("must not change partition_by columns")
            if (order_by != 1) and len(
                set(partition_by).intersection(set(order_by))
            ) > 0:
                raise ValueError("order_by and partition_by columns must be disjoint")
        if len(new_cols_produced_in_calc.intersection(order_by)) > 0:
            raise ValueError("must not change partition_by columns")
        if len(set(reverse).difference(order_by)) > 0:
            raise ValueError("all columns in reverse must be in order_by")
        if self.is_trivial_when_intermediate_():
            return self.sources[0].extend_parsed_(
                parsed_ops=parsed_ops,
                partition_by=partition_by,
                order_by=order_by,
                reverse=reverse,
            )
        # see if we can combine nodes
        if isinstance(self, ExtendNode):
            compatible_partition = (partition_by == self.partition_by) or (
                ((partition_by == 1) or (len(partition_by) <= 0))
                and ((self.partition_by == 1) or (len(self.partition_by) <= 0))
            )
            same_windowing = (
                data_algebra.expr_rep.implies_windowed(parsed_ops)
                == self.windowed_situation
            )
            if (
                compatible_partition
                and same_windowing
                and (order_by == self.order_by)
                and (reverse == self.reverse)
            ):
                new_ops = data_algebra.data_ops_utils.try_to_merge_ops(
                    self.ops, parsed_ops
                )
                if new_ops is not None:
                    return ExtendNode(
                        source=self.sources[0],
                        parsed_ops=new_ops,
                        partition_by=partition_by,
                        order_by=order_by,
                        reverse=reverse,
                    )
        # new ops
        return ExtendNode(
            source=self,
            parsed_ops=parsed_ops,
            partition_by=partition_by,
            order_by=order_by,
            reverse=reverse,
        )

    def extend(
        self, ops, *, partition_by=None, order_by=None, reverse=None
    ) -> "ViewRepresentation":
        """
        Add new derived columns, can replace existing columns.

        :param ops: dictionary of calculations to perform.
        :param partition_by: optional window partition specification, or 1.
        :param order_by: optional window ordering specification, or 1.
        :param reverse: optional order reversal specification.
        :return: compose operator directed acyclic graph
        """
        parsed_ops = data_algebra.expr_parse.parse_assignments_in_context(
            ops=ops, view=self
        )
        return self.extend_parsed_(
            parsed_ops=parsed_ops,
            partition_by=partition_by,
            order_by=order_by,
            reverse=reverse,
        )

    def project_parsed_(
        self, parsed_ops=None, *, group_by=None
    ) -> "ViewRepresentation":
        """
        Compute projection, or grouped calculation for parsed ops. Internal method.

        :param parsed_ops: dictionary of calculations to perform, can be empty.
        :param group_by: optional group key(s) specification.
        :return: compose operator directed acyclic graph
        """

        group_by = _work_col_group_arg(
            group_by, arg_name="group_by", columns=self.column_names
        )
        assert group_by != 1
        if ((parsed_ops is None) or (len(parsed_ops) < 1)) and (len(group_by) < 1):
            raise ValueError("project must have ops or group_by")
        new_cols_produced_in_calc = set([k for k in parsed_ops.keys()])
        if len(new_cols_produced_in_calc.intersection(group_by)):
            raise ValueError("project can not alter grouping columns")
        if self.is_trivial_when_intermediate_():
            return self.sources[0].project_parsed_(parsed_ops, group_by=group_by)
        return ProjectNode(source=self, parsed_ops=parsed_ops, group_by=group_by)

    def project(self, ops=None, *, group_by=None) -> "ViewRepresentation":
        """
        Compute projection, or grouped calculation.

        :param ops: dictionary of calculations to perform, can be empty.
        :param group_by: optional group key(s) specification.
        :return: compose operator directed acyclic graph
        """
        parsed_ops = data_algebra.expr_parse.parse_assignments_in_context(
            ops=ops, view=self
        )
        return self.project_parsed_(parsed_ops=parsed_ops, group_by=group_by)

    def natural_join(
        self,
        b,
        *,
        on = None,
        jointype: str,
        check_all_common_keys_in_equi_spec: bool = False,
        by = None,
        check_all_common_keys_in_by: bool = False,
    ) -> "ViewRepresentation":
        """
        Join self (left) results with b (right).

        :param b: second or right table to join to.
        :param on: column names to enforce equality on (list of column names, list of tuples, or dictionary)
        :param jointype: name of join type.
        :param check_all_common_keys_in_equi_spec: if True, raise if any non-equality key columns are common to tables.
        :param by: synonym for on, only set at most one of on or by (deprecated).
        :param check_all_common_keys_in_by: synonym for check_all_common_keys_in_equi_spec (deprecated).
        :return: compose operator directed acyclic graph
        """
        assert isinstance(b, ViewRepresentation)
        assert (on is None) or (by is None)
        if by is not None:
            on = by
        on_a, on_b = _convert_on_clause_to_parallel_lists(on)
        assert isinstance(jointype, str)
        assert isinstance(check_all_common_keys_in_equi_spec, bool)
        assert isinstance(check_all_common_keys_in_by, bool)
        check_all_common_keys_in_equi_spec = (
            check_all_common_keys_in_equi_spec or check_all_common_keys_in_by
        )
        if self.is_trivial_when_intermediate_():
            return self.sources[0].natural_join(b, on=on, jointype=jointype)
        return NaturalJoinNode(
            a=self,
            b=b,
            on_a=on_a,
            on_b=on_b,
            jointype=jointype,
            check_all_common_keys_in_equi_spec=check_all_common_keys_in_equi_spec,
        )

    def concat_rows(
        self, b, *, id_column="source_name", a_name="a", b_name="b"
    ) -> "ViewRepresentation":
        """
        Union or concatenate rows of self with rows of b.

        :param b: table with rows to add.
        :param id_column: optional name for new source identification column.
        :param a_name: source annotation to use for self/a.
        :param b_name: source annotation to use for b.
        :return: compose operator directed acyclic graph
        """
        if b is None:
            return self
        assert isinstance(b, ViewRepresentation)
        assert isinstance(id_column, (str, type(None)))
        assert isinstance(a_name, str)
        assert isinstance(b_name, str)
        if self.is_trivial_when_intermediate_():
            return self.sources[0].concat_rows(
                b, id_column=id_column, a_name=a_name, b_name=b_name
            )
        return ConcatRowsNode(
            a=self, b=b, id_column=id_column, a_name=a_name, b_name=b_name
        )

    def select_rows_parsed_(self, parsed_expr) -> "ViewRepresentation":
        """
        Select rows matching parsed expr criteria. Internal method.

        :param parsed_expr: logical expression specifying desired rows.
        :return: compose operator directed acyclic graph
        """
        if parsed_expr is None:
            return self
        if self.is_trivial_when_intermediate_():
            return self.sources[0].select_rows_parsed_(parsed_expr=parsed_expr)
        return SelectRowsNode(source=self, ops=parsed_expr)

    def select_rows(self, expr) -> "ViewRepresentation":
        """
        Select rows matching expr criteria.

        :param expr: logical expression specifying desired rows.
        :return: compose operator directed acyclic graph
        """
        if expr is None:
            return self
        if isinstance(expr, (list, tuple)):
            # convert lists to and expressions
            assert all([isinstance(vi, str) for vi in expr])
            if len(expr) < 1:
                return self
            elif len(expr) == 1:
                expr = expr[0]
            else:
                expr = " and ".join(["(" + vi + ")" for vi in expr])
        assert isinstance(expr, (str, data_algebra.expr_rep.PreTerm))
        if self.is_trivial_when_intermediate_():
            return self.sources[0].select_rows(expr)
        ops = data_algebra.expr_parse.parse_assignments_in_context(
            ops={"expr": expr}, view=self
        )

        def r_walk_expr(opv):
            """recursively inspect expression types"""
            if not isinstance(opv, data_algebra.expr_rep.Expression):
                return
            for oi in opv.args:
                r_walk_expr(oi)

        for op in ops.values():
            r_walk_expr(op)
        return self.select_rows_parsed_(parsed_expr=ops)

    def drop_columns(self, column_deletions) -> "ViewRepresentation":
        """
        Remove columns from result.

        :param column_deletions: list of columns to remove.
        :return: compose operator directed acyclic graph
        """
        if isinstance(column_deletions, str):
            column_deletions = [column_deletions]
        else:
            column_deletions = list(column_deletions)
        if (column_deletions is None) or (len(column_deletions) < 1):
            return self
        if self.is_trivial_when_intermediate_():
            return self.sources[0].drop_columns(column_deletions)
        return DropColumnsNode(source=self, column_deletions=column_deletions)

    def select_columns(self, columns) -> "ViewRepresentation":
        """
        Narrow to columns in result.

        :param columns: list of columns to keep.
        :return: compose operator directed acyclic graph
        """
        if isinstance(columns, str):
            columns = [columns]
        if (columns is None) or (len(columns) < 1):
            raise ValueError("must select at least one column")
        if columns == self.column_names:
            return self
        if self.is_trivial_when_intermediate_():
            return self.sources[0].select_columns(columns)
        if isinstance(self, SelectColumnsNode):
            return self.sources[0].select_columns(columns)
        if isinstance(self, DropColumnsNode):
            return self.sources[0].select_columns(columns)
        return SelectColumnsNode(source=self, columns=columns)

    def map_columns(self, column_remapping) -> "ViewRepresentation":
        """
        Map column names or rename.

        :param column_remapping: dictionary mapping old column sources to new column names (same
                                 direction as Pandas rename).
        :return: compose operator directed acyclic graph
        """
        if (column_remapping is None) or (len(column_remapping) < 1):
            return self
        assert isinstance(column_remapping, dict)
        if self.is_trivial_when_intermediate_():
            return self.sources[0].map_columns(column_remapping)
        return MapColumnsNode(source=self, column_remapping=column_remapping)

    def rename_columns(self, column_remapping) -> "ViewRepresentation":
        """
        Rename columns.

        :param column_remapping: dictionary mapping new column names to old column sources (same
                                 direction as extend).
        :return: compose operator directed acyclic graph
        """
        if (column_remapping is None) or (len(column_remapping) < 1):
            return self
        assert isinstance(column_remapping, dict)
        if self.is_trivial_when_intermediate_():
            return self.sources[0].rename_columns(column_remapping)
        return RenameColumnsNode(source=self, column_remapping=column_remapping)

    def order_rows(self, columns, *, reverse=None, limit=None) -> "ViewRepresentation":
        """
        Order rows by column set.

        :param columns: columns to order by.
        :param reverse: optional columns to reverse order.
        :param limit: optional row limit to impose on result.
        :return: compose operator directed acyclic graph
        """
        if isinstance(columns, str):
            columns = [columns]
        if isinstance(reverse, str):
            reverse = [reverse]
        if ((columns is None) or (len(columns) < 1)) and (limit is None):
            return self
        if self.is_trivial_when_intermediate_():
            return self.sources[0].order_rows(columns, reverse=reverse, limit=limit)
        return OrderRowsNode(source=self, columns=columns, reverse=reverse, limit=limit)

    def convert_records(self, record_map) -> "ViewRepresentation":
        """
        Apply a record mapping taking blocks_in to blocks_out structures.

        :param record_map: data_algebra.cdata.RecordMap transform specification
        :return: compose operator directed acyclic graph
        """
        if record_map is None:
            return self
        if self.is_trivial_when_intermediate_():
            return self.sources[0].convert_records(record_map)
        return ConvertRecordsNode(source=self, record_map=record_map)


class TableDescription(ViewRepresentation):
    """
     Describe columns, and qualifiers, of a table.

    Example:
        from data_algebra.data_ops import *
        d = TableDescription(table_name='d', column_names=['x', 'y'])
        print(d)
    """

    table_name: str
    table_name_was_set_by_user: bool
    qualifiers: Dict[str, str]

    def __init__(
        self,
        *,
        table_name: Optional[str] = None,
        column_names: Iterable[str],
        qualifiers=None,
        sql_meta=None,
        head=None,
        limit_was: Optional[int] = None,
        nrows: Optional[int] = None,
    ):
        if isinstance(column_names, str):
            column_names = (column_names,)
        else:
            column_names = tuple(
                column_names
            )  # convert to tuple from other types such as series
        table_name_was_set_by_user = True
        if table_name is None:
            table_name_was_set_by_user = False
            table_name = "data_frame"
        assert isinstance(table_name, str)
        ViewRepresentation.__init__(
            self, column_names=column_names, node_name="TableDescription", key=table_name,
        )
        self.table_name_was_set_by_user = table_name_was_set_by_user
        self.table_name = table_name
        assert isinstance(table_name, str)
        if head is not None:
            if set([c for c in head.columns]) != set(column_names):
                raise ValueError("head.columns != column_names")
        self.head = head
        self.limit_was = limit_was
        self.sql_meta = sql_meta
        self.nrows = nrows
        self.column_names = column_names
        if qualifiers is None:
            qualifiers = {}
        assert isinstance(qualifiers, dict)
        self.qualifiers = qualifiers.copy()

    def same_table_description_(self, other):
        """
        Return true if other is a description of the same table. Internal method, ingores data.
        """
        if not isinstance(other, data_algebra.data_ops.TableDescription):
            return False
        if self.table_name_was_set_by_user != other.table_name_was_set_by_user:
            return False
        if self.table_name != other.table_name:
            return False
        if self.key != other.key:
            return False
        if self.column_names != other.column_names:
            return False
        if self.qualifiers != other.qualifiers:
            return False
        # ignore head and limit_was, as they are just advisory
        return True

    def merged_rep_id(self) -> str:
        """
        String key for lookups.
        """
        return "table_" + str(self.key)

    def replace_leaves(self, replacement_map: Dict[str, Any]):
        """
        Replace leaves of DAG

        :param a: operators to apply to
         :param replacement_map, table/sqlkeys mapped to replacement Operator platforms
        :return: new operator DAG
        """
        assert isinstance(replacement_map, dict)
        try:
            return replacement_map[self.key]
        except KeyError:
            pass
        # copy self
        r = TableDescription(
            table_name=self.table_name,
            column_names=self.column_names,
            qualifiers=self.qualifiers,
        )
        return r

    def _equiv_nodes(self, other):
        if not isinstance(other, TableDescription):
            return False
        if not self.table_name == other.table_name:
            return False
        if not self.column_names == other.column_names:
            return False
        if not self.qualifiers == other.qualifiers:
            return False
        if not self.key == other.key:
            return False
        return True

    def to_python_src_(self, *, indent=0, strict=True, print_sources=True):
        """
        Return text representing operations.

        :param indent: additional indent to apply in formatting.
        :param strict: if False allow eliding of columns names and other long structures.
        :param print_sources: logical, print children.
        """
        spacer = " "
        if indent >= 0:
            spacer = "\n " + " " * indent
        column_limit = 20
        truncated = (not strict) and (column_limit < len(self.column_names))
        if truncated:
            cols_to_print = [
                self.column_names[i].__repr__() for i in range(column_limit)
            ] + ["+ " + str(len(self.column_names)) + " more"]
        else:
            cols_to_print = [c.__repr__() for c in self.column_names]
        col_text = data_algebra.flow_text.flow_text(
            cols_to_print, align_right=70 - max(0, indent), sep_width=2
        )
        col_text = [", ".join(line) for line in col_text]
        col_text = (",  " + spacer).join(col_text)
        s = (
            "TableDescription("
            + spacer
            + "table_name="
            + self.table_name.__repr__()
            + ","
            + spacer
            + "column_names=["
            + spacer
            + "  "
            + col_text
            + "]"
        )
        if len(self.qualifiers) > 0:
            s = s + "," + spacer + "qualifiers=" + self.qualifiers.__repr__()
        s = s + ")"
        return s

    def get_tables(self):
        """get a dictionary of all tables used in an operator DAG,
        raise an exception if the values are not consistent"""
        return {self.key: self}

    def columns_used_from_sources(self, using: Optional[set] = None) -> List:
        """
        Get columns used from sources. Internal method.

        :param using: optional column restriction.
        :return: list of order sets (list parallel to sources).
        """
        return []  # no inputs to table description

    def to_near_sql_implementation_(
        self, db_model, *, using, temp_id_source, sql_format_options=None
    ) -> data_algebra.near_sql.NearSQL:
        """
        Convert operator dag into NearSQL type for translation to SQL string.

        :param db_model: database model
        :param using: optional column restriction set
        :param temp_id_source: source of temporary ids
        :param sql_format_options: options for sql formatting
        :return: data_algebra.near_sql.NearSQL
        """
        return db_model.table_def_to_near_sql(
            self,
            using=using,
            temp_id_source=temp_id_source,
            sql_format_options=sql_format_options,
        )

    def __str__(self):
        rep = ViewRepresentation.__str__(self)
        if self.head is not None:
            rep = rep + "\n#\t" + str(self.head).replace("\n", "\n#\t")
        return rep

    # comparable to other table descriptions
    def __eq__(self, other):
        if not isinstance(other, TableDescription):
            return False
        return self.key.__eq__(other.key)

    def __hash__(self):
        return self.key.__hash__()


def describe_table(
    d,
    table_name: Optional[str] = None,
    *,
    qualifiers=None,
    sql_meta=None,
    row_limit: Optional[int] = 7,
    keep_sample=True,
    keep_all=False,
) -> TableDescription:
    """
    :param d: data table table to describe
    :param table_name: name of table
    :param qualifiers: optional, able qualifiers
    :param sql_meta: optional, sql meta information map
    :param row_limit: how many rows to sample
    :param keep_sample: logical, if True retain head of table
    :param keep_all: logical, if True retain all of table
    :return: TableDescription
    """
    assert not isinstance(d, OperatorPlatform)
    assert not isinstance(d, ViewRepresentation)
    assert isinstance(keep_sample, bool)
    assert isinstance(keep_all, bool)
    assert isinstance(table_name, (str, type(None)))  # TODO: see if we can change this to never None
    # confirm our data model is loaded
    data_model = data_algebra.data_model.lookup_data_model_for_dataframe(d)
    assert data_model.is_appropriate_data_instance(d)
    column_names = list(d.columns)
    head = None
    nrows = d.shape[0]
    if keep_sample or keep_all:
        if keep_all or (row_limit is None) or (row_limit >= nrows):
            row_limit = None
            head = d
        else:
            head = d.head(row_limit)
    return TableDescription(
        table_name=table_name,
        column_names=column_names,
        qualifiers=qualifiers,
        sql_meta=sql_meta,
        head=head,
        limit_was=row_limit,
        nrows=nrows,
    )


def table(d, *, table_name=None):
    """
    Capture a table for later use

    :param d: Pandas data frame to capture
    :param table_name: name for this table
    :return: a table description, with values retained
    """
    return describe_table(
        d=d,
        table_name=table_name,
        qualifiers=None,
        sql_meta=None,
        row_limit=None,
        keep_sample=True,
        keep_all=True,
    )


def descr(**kwargs):
    """
    Capture a named partial table as a description.

    :param kwargs: exactly one named table of the form table_name=table_value
    :return: a table description (not all values retained)
    """
    assert len(kwargs) == 1
    table_name = [k for k in kwargs.keys()][0]
    d = kwargs[table_name]
    return describe_table(
        d=d,
        table_name=table_name,
        qualifiers=None,
        sql_meta=None,
        row_limit=7,
        keep_sample=True,
        keep_all=False,
    )


def data(*args, **kwargs):
    """
    Capture a full table for later use. Exactly one of args/kwags can be set.

    :param args: at most one unnamed table of the form table_name=table_value
    :param kwargs: at most one named table of the form table_name=table_value
    :return: a table description, with all values retained
    """
    assert (len(args) + len(kwargs)) == 1
    if len(kwargs) == 1:
        table_name = [k for k in kwargs.keys()][0]
        d = kwargs[table_name]
        return table(d=d, table_name=table_name)
    d = args[0]
    return table(d=d, table_name=None)


class ExtendNode(ViewRepresentation):
    """
    Class representation of .extend() method/step.
    """

    windowed_situation: bool
    ordered_windowed_situation: bool
    partition_by: List[str]

    def __init__(
        self, *, source, parsed_ops, partition_by=None, order_by=None, reverse=None
    ):
        windowed_situation = data_algebra.expr_rep.implies_windowed(parsed_ops)
        ordered_windowed_situation = False
        self.ops = parsed_ops
        if partition_by is None:
            partition_by = []
        if isinstance(partition_by, numbers.Number):
            partition_by = []
            windowed_situation = True
        if isinstance(partition_by, str):
            partition_by = [partition_by]
        if len(partition_by) > 0:
            windowed_situation = True
        self.partition_by = partition_by
        if order_by is None:
            order_by = []
        if isinstance(order_by, str):
            order_by = [order_by]
        if len(order_by) > 0:
            windowed_situation = True
            ordered_windowed_situation = True
        self.windowed_situation = windowed_situation
        self.order_by = order_by
        if reverse is None:
            reverse = []
        if isinstance(reverse, str):
            reverse = [reverse]
        self.reverse = reverse
        column_names = list(source.column_names)
        consumed_cols = set()
        for (k, o) in parsed_ops.items():
            o.get_column_names(consumed_cols)
        unknown_cols = consumed_cols - set(source.column_names)
        if len(unknown_cols) > 0:
            raise KeyError("referred to unknown columns: " + str(unknown_cols))
        known_cols = set(column_names)
        for ci in parsed_ops.keys():
            if ci not in known_cols:
                column_names.append(ci)
        if len(partition_by) != len(set(partition_by)):
            raise ValueError("Duplicate name(s) in partition_by")
        if len(order_by) != len(set(order_by)):
            raise ValueError("Duplicate name(s) in order_by")
        if len(reverse) != len(set(reverse)):
            raise ValueError("Duplicate name(s) in reverse")
        unknown = set(partition_by) - known_cols
        if len(unknown) > 0:
            raise ValueError("unknown partition_by columns: " + str(unknown))
        unknown = set(order_by) - known_cols
        if len(unknown) > 0:
            raise ValueError("unknown order_by columns: " + str(unknown))
        unknown = set(reverse) - set(order_by)
        if len(unknown) > 0:
            raise ValueError("reverse columns not in order_by: " + str(unknown))
        bad_overwrite = set(parsed_ops.keys()).intersection(
            set(partition_by).union(order_by, reverse)
        )
        if len(bad_overwrite) > 0:
            raise ValueError("tried to change: " + str(bad_overwrite))
        # check op arguments are very simple: all arguments are column names
        if windowed_situation:
            source_col_set = set(source.column_names)
            for (k, opk) in parsed_ops.items():
                if not isinstance(opk, data_algebra.expr_rep.Expression):
                    raise ValueError(
                        "non-aggregated expression in windowed/partitioned extend: "
                        + "'"
                        + k
                        + "': '"
                        + str(opk)
                        + "'"
                    )
                if len(opk.args) > 1:
                    for i in range(1, len(opk.args)):
                        if not isinstance(opk.args[i], data_algebra.expr_rep.Value):
                            raise ValueError(
                                "in windowed situations only simple operators are allowed, "
                                + "'"
                                + k
                                + "': '"
                                + str(opk)
                                + "' term is too complex an expression"
                            )
                if len(opk.args) > 0:
                    if isinstance(opk.args[0], data_algebra.expr_rep.ColumnReference):
                        value_name = opk.args[0].column_name
                        if value_name not in source_col_set:
                            raise ValueError(value_name + " not in source column set")
                    else:
                        if not isinstance(opk.args[0], data_algebra.expr_rep.Value):
                            raise ValueError(
                                "in windowed situations only simple operators are allowed, "
                                + "'"
                                + k
                                + "': '"
                                + str(opk)
                                + "' term is too complex an expression"
                            )
                if windowed_situation and (
                    opk.op
                    in data_algebra.expr_rep.fn_names_that_contradict_windowed_situation
                ):
                    raise ValueError(
                        str(opk) + "' is not allowed in a windowed situation"
                    )
                if ordered_windowed_situation and (
                    opk.op
                    in data_algebra.expr_rep.fn_names_that_contradict_ordered_windowed_situation
                ):
                    raise ValueError(
                        str(opk) + "' is not allowed in an ordered windowed situation"
                    )
                if (not ordered_windowed_situation) and (
                    opk.op
                    in data_algebra.expr_rep.fn_names_that_imply_ordered_windowed_situation
                ):
                    raise ValueError(
                        str(opk) + "' is not allowed in not-ordered windowed situation"
                    )
        self.ordered_windowed_situation = ordered_windowed_situation
        ViewRepresentation.__init__(
            self, column_names=column_names, sources=[source], node_name="ExtendNode"
        )

    def replace_leaves(self, replacement_map: Dict[str, Any]):
        """
        Replace leaves of DAG

        :param a: operators to apply to
         :param replacement_map, table/sqlkeys mapped to replacement Operator platforms
        :return: new operator DAG
        """
        assert isinstance(replacement_map, dict)
        new_sources = [
            s.replace_leaves(replacement_map) for s in self.sources
        ]
        return new_sources[0].extend_parsed_(
            parsed_ops=self.ops,
            partition_by=self.partition_by,
            order_by=self.order_by,
            reverse=self.reverse,
        )

    def _equiv_nodes(self, other):
        if not isinstance(other, ExtendNode):
            return False
        if not self.windowed_situation == other.windowed_situation:
            return False
        if not self.partition_by == other.partition_by:
            return False
        if not self.order_by == other.order_by:
            return False
        if not self.reverse == other.reverse:
            return False
        if set(self.ops.keys()) != set(other.ops.keys()):
            return False
        for k in self.ops.keys():
            if not self.ops[k].is_equal(other.ops[k]):
                return False
        return True

    def get_method_uses_(self, methods_seen: Set[MethodUse]) -> None:
        """
        Implementation of get methods_used(), internal method.

        :params methods_seen: set to collect results in.
        :return: None
        """
        for s in self.sources:
            s.get_method_uses_(methods_seen)
        method_names_seen: Set[str] = set()
        for opk in self.ops.values():
            opk.get_method_names(method_names_seen)
        for k in method_names_seen:
            methods_seen.add(
                MethodUse(
                    k,
                    is_project=False,
                    is_windowed=self.windowed_situation,
                    is_ordered=self.ordered_windowed_situation,
                )
            )

    def check_extend_window_fns_(self):
        """
        Confirm extend functions are all compatible with windowing in Pandas. Internal function.
        """
        window_situation = (len(self.partition_by) > 0) or (len(self.order_by) > 0)
        if window_situation:
            # check these are forms we are prepared to work with
            for (k, opk) in self.ops.items():
                if len(opk.args) > 0:
                    if len(opk.args) > 1:
                        for i in range(1, len(opk.args)):
                            if not isinstance(opk.args[i], data_algebra.expr_rep.Value):
                                raise ValueError(
                                    "window function with more than one non-value argument"
                                )
                    for i in range(len(opk.args)):
                        if not (
                            isinstance(
                                opk.args[0], data_algebra.expr_rep.ColumnReference
                            )
                            or isinstance(opk.args[0], data_algebra.expr_rep.Value)
                        ):
                            raise ValueError(
                                "window expression argument must be a column or value: "
                                + str(k)
                                + ": "
                                + str(opk)
                            )

    def columns_used_from_sources(self, using: Optional[set] = None) -> List:
        """
        Get columns used from sources. Internal method.

        :param using: optional column restriction.
        :return: list of order sets (list parallel to sources).
        """
        if using is None:
            return [OrderedSet(self.sources[0].column_names)]
        subops = {k: op for (k, op) in self.ops.items() if k in using}
        if len(subops) <= 0:
            return [OrderedSet(self.sources[0].column_names)]
        columns_we_take = using.union(self.partition_by, self.order_by, self.reverse)
        columns_we_take = columns_we_take - subops.keys()
        for (k, o) in subops.items():
            o.get_column_names(columns_we_take)
        return [
            OrderedSet(
                [v for v in self.sources[0].column_names if v in columns_we_take]
            )
        ]

    def to_python_src_(self, *, indent=0, strict=True, print_sources=True):
        """
        Return text representing operations.

        :param indent: additional indent to apply in formatting.
        :param strict: if False allow eliding of columns names and other long structures.
        :param print_sources: logical, print children.
        """
        spacer = " "
        if indent >= 0:
            spacer = "\n   " + " " * indent
        s = ""
        if print_sources:
            s = self.sources[0].to_python_src_(indent=indent, strict=strict) + spacer
        ops = [
            k.__repr__() + ": " + opi.to_python().__repr__()
            for (k, opi) in self.ops.items()
        ]
        flowed = ("," + spacer + " ").join(ops)
        s = s + (".extend({" + spacer + " " + flowed + "}")
        if self.windowed_situation:
            if len(self.partition_by) > 0:
                s = s + "," + spacer + "partition_by=" + self.partition_by.__repr__()
            else:
                s = s + "," + spacer + "partition_by=1"
        if len(self.order_by) > 0:
            s = s + "," + spacer + "order_by=" + self.order_by.__repr__()
        if len(self.reverse) > 0:
            s = s + "," + spacer + "reverse=" + self.reverse.__repr__()
        s = s + ")"
        return s

    def to_near_sql_implementation_(
        self, db_model, *, using, temp_id_source, sql_format_options=None
    ) -> data_algebra.near_sql.NearSQL:
        """
        Convert operator dag into NearSQL type for translation to SQL string.

        :param db_model: database model
        :param using: optional column restriction set
        :param temp_id_source: source of temporary ids
        :param sql_format_options: options for sql formatting
        :return: data_algebra.near_sql.NearSQL
        """
        return db_model.extend_to_near_sql(
            self,
            using=using,
            temp_id_source=temp_id_source,
            sql_format_options=sql_format_options,
        )


class ProjectNode(ViewRepresentation):
    """
    Class representation of .project() method/step.
    """

    # TODO: should project to take an optional order for last() style calculations?
    def __init__(self, *, source, parsed_ops, group_by=None):
        self.ops = parsed_ops
        if group_by is None:
            group_by = []
        if isinstance(group_by, str):
            group_by = [group_by]
        self.group_by = group_by
        column_names = group_by.copy()
        consumed_cols = set()
        for c in group_by:
            consumed_cols.add(c)
        for (k, o) in parsed_ops.items():
            o.get_column_names(consumed_cols)
        unknown_cols = consumed_cols - set(source.column_names)
        if len(unknown_cols) > 0:
            raise KeyError("referred to unknown columns: " + str(unknown_cols))
        known_cols = set(column_names)
        for ci in parsed_ops.keys():
            if ci not in known_cols:
                column_names.append(ci)
        if len(group_by) != len(set(group_by)):
            raise ValueError("Duplicate name in group_by")
        unknown = set(group_by) - known_cols
        if len(unknown) > 0:
            raise ValueError("unknown group_by columns: " + str(unknown))
        ViewRepresentation.__init__(
            self, column_names=column_names, sources=[source], node_name="ProjectNode"
        )
        for (k, opk) in self.ops.items():
            if isinstance(opk, data_algebra.expr_rep.Expression):
                if len(opk.args) > 1:
                    raise ValueError(
                        "non-trivial aggregation expression: "
                        + str(k)
                        + ": "
                        + str(opk)
                    )
                if len(opk.args) > 0:
                    if not (
                        isinstance(opk.args[0], data_algebra.expr_rep.ColumnReference)
                        or isinstance(opk.args[0], data_algebra.expr_rep.Value)
                    ):
                        raise ValueError(
                            "windows expression argument must be a column or value: "
                            + str(k)
                            + ": "
                            + str(opk)
                        )
                if (
                    opk.op
                    in data_algebra.expr_rep.fn_names_that_imply_ordered_windowed_situation
                ):
                    raise ValueError(str(opk) + "' is not allowed in project")
                if opk.op in data_algebra.expr_rep.fn_names_not_allowed_in_project:
                    raise ValueError(str(opk) + "' is not allowed in project")
            else:
                raise ValueError(
                    "non-aggregated expression in project: " + str(k) + ": " + str(opk)
                )
            # TODO: check op is in list of aggregators
            # Note: non-aggregators making through will be caught by table shape check

    def get_method_uses_(self, methods_seen: Set[MethodUse]) -> None:
        """
        Implementation of get methods_used(), internal method.

        :params methods_seen: set to collect results in.
        :return: None
        """
        for s in self.sources:
            s.get_method_uses_(methods_seen)
        method_names_seen: Set[str] = set()
        for opk in self.ops.values():
            opk.get_method_names(method_names_seen)
        for k in method_names_seen:
            methods_seen.add(
                MethodUse(k, is_project=True, is_windowed=False, is_ordered=False)
            )

    def replace_leaves(self, replacement_map: Dict[str, Any]):
        """
        Replace leaves of DAG

        :param a: operators to apply to
         :param replacement_map, table/sqlkeys mapped to replacement Operator platforms
        :return: new operator DAG
        """
        assert isinstance(replacement_map, dict)
        new_sources = [
            s.replace_leaves(replacement_map) for s in self.sources
        ]
        return new_sources[0].project_parsed_(
            parsed_ops=self.ops, group_by=self.group_by
        )

    def _equiv_nodes(self, other):
        if not isinstance(other, ProjectNode):
            return False
        if not self.group_by == other.group_by:
            return False
        if set(self.ops.keys()) != set(other.ops.keys()):
            return False
        for k in self.ops.keys():
            if not self.ops[k].is_equal(other.ops[k]):
                return False
        return True

    def columns_used_from_sources(self, using: Optional[set] = None) -> List:
        """
        Get columns used from sources. Internal method.

        :param using: optional column restriction.
        :return: list of order sets (list parallel to sources).
        """
        if using is None:
            subops = self.ops
        else:
            subops = {k: op for (k, op) in self.ops.items() if k in using}
        columns_we_take = set(self.group_by)
        for (k, o) in subops.items():
            o.get_column_names(columns_we_take)
        return [columns_we_take]

    def to_python_src_(self, *, indent=0, strict=True, print_sources=True):
        """
        Return text representing operations.

        :param indent: additional indent to apply in formatting.
        :param strict: if False allow eliding of columns names and other long structures.
        :param print_sources: logical, print children.
        """
        spacer = " "
        if indent >= 0:
            spacer = "\n   " + " " * indent
        s = ""
        if print_sources:
            s = (
                self.sources[0].to_python_src_(indent=indent, strict=strict)
                + "\n"
                + " " * (max(indent, 0) + 3)
            )
        s = s + (
            ".project({"
            + spacer
            + " "
            + ("," + spacer + " ").join(
                [
                    k.__repr__() + ": " + opi.to_python().__repr__()
                    for (k, opi) in self.ops.items()
                ]
            )
            + "}"
        )
        if len(self.group_by) > 0:
            s = s + "," + spacer + "group_by=" + self.group_by.__repr__()
        s = s + ")"
        return s

    def to_near_sql_implementation_(
        self, db_model, *, using, temp_id_source, sql_format_options=None
    ) -> data_algebra.near_sql.NearSQL:
        """
        Convert operator dag into NearSQL type for translation to SQL string.

        :param db_model: database model
        :param using: optional column restriction set
        :param temp_id_source: source of temporary ids
        :param sql_format_options: options for sql formatting
        :return: data_algebra.near_sql.NearSQL
        """
        return db_model.project_to_near_sql(
            self,
            using=using,
            temp_id_source=temp_id_source,
            sql_format_options=sql_format_options,
        )


class SelectRowsNode(ViewRepresentation):
    """
    Class representation of .select() method/step.
    """

    expr: data_algebra.expr_rep.Expression
    decision_columns: Set[str]

    def __init__(self, source, ops):
        if len(ops) < 1:
            raise ValueError("no ops")
        if len(ops) > 1:
            raise ValueError("too many ops")
        self.ops = ops
        self.expr = ops["expr"]
        self.decision_columns = set()
        self.expr.get_column_names(self.decision_columns)
        ViewRepresentation.__init__(
            self,
            column_names=source.column_names,
            sources=[source],
            node_name="SelectRowsNode",
        )

    def get_method_uses_(self, methods_seen: Set[MethodUse]) -> None:
        """
        Implementation of get methods_used(), internal method.

        :params methods_seen: set to collect results in.
        :return: None
        """
        for s in self.sources:
            s.get_method_uses_(methods_seen)
        method_names_seen: Set[str] = set()
        self.expr.get_method_names(method_names_seen)
        for k in method_names_seen:
            methods_seen.add(
                MethodUse(k, is_project=False, is_windowed=False, is_ordered=False)
            )

    def replace_leaves(self, replacement_map: Dict[str, Any]):
        """
        Replace leaves of DAG

        :param a: operators to apply to
         :param replacement_map, table/sqlkeys mapped to replacement Operator platforms
        :return: new operator DAG
        """
        assert isinstance(replacement_map, dict)
        new_sources = [
            s.replace_leaves(replacement_map) for s in self.sources
        ]
        return new_sources[0].select_rows_parsed_(parsed_ops=self.ops)

    def _equiv_nodes(self, other):
        if not isinstance(other, SelectRowsNode):
            return False
        if not self.expr.is_equal(other.expr):
            return False
        if len(self.ops) != len(other.ops):
            return False
        return True

    def columns_used_from_sources(self, using: Optional[set] = None) -> List:
        """
        Get columns used from sources. Internal method.

        :param using: optional column restriction.
        :return: list of order sets (list parallel to sources).
        """
        columns_we_take = OrderedSet(self.sources[0].column_names)
        if using is None:
            return [columns_we_take]
        columns_we_take = ordered_intersect(columns_we_take, using)
        columns_we_take = ordered_union(columns_we_take, self.decision_columns)
        return [columns_we_take]

    def to_python_src_(self, *, indent=0, strict=True, print_sources=True):
        """
        Return text representing operations.

        :param indent: additional indent to apply in formatting.
        :param strict: if False allow eliding of columns names and other long structures.
        :param print_sources: logical, print children.
        """
        s = ""
        if print_sources:
            s = (
                self.sources[0].to_python_src_(indent=indent, strict=strict)
                + "\n"
                + " " * (max(indent, 0) + 3)
            )
        s = s + (".select_rows(" + self.expr.to_python().__repr__() + ")")
        return s

    def to_near_sql_implementation_(
        self, db_model, *, using, temp_id_source, sql_format_options=None
    ) -> data_algebra.near_sql.NearSQL:
        """
        Convert operator dag into NearSQL type for translation to SQL string.

        :param db_model: database model
        :param using: optional column restriction set
        :param temp_id_source: source of temporary ids
        :param sql_format_options: options for sql formatting
        :return: data_algebra.near_sql.NearSQL
        """
        return db_model.select_rows_to_near_sql(
            self,
            using=using,
            temp_id_source=temp_id_source,
            sql_format_options=sql_format_options,
        )


class SelectColumnsNode(ViewRepresentation):
    """
    Class representation of .select_columns() method/step.
    """

    column_selection: List[str]

    def __init__(self, source, columns):
        if isinstance(columns, str):
            columns = [columns]
        column_selection = [c for c in columns]
        self.column_selection = column_selection
        if len(column_selection) < 1:
            raise ValueError("can not drop all columns")
        unknown = set(column_selection) - set(source.column_names)
        if len(unknown) > 0:
            raise KeyError("selecting unknown columns " + str(unknown))
        if isinstance(source, SelectColumnsNode):
            source = source.sources[0]
        ViewRepresentation.__init__(
            self,
            column_names=column_selection,
            sources=[source],
            node_name="SelectColumnsNode",
        )

    def replace_leaves(self, replacement_map: Dict[str, Any]):
        """
        Replace leaves of DAG

        :param a: operators to apply to
         :param replacement_map, table/sqlkeys mapped to replacement Operator platforms
        :return: new operator DAG
        """
        assert isinstance(replacement_map, dict)
        new_sources = [
            s.replace_leaves(replacement_map) for s in self.sources
        ]
        return new_sources[0].select_columns(columns=self.column_selection)

    def _equiv_nodes(self, other):
        if not isinstance(other, SelectColumnsNode):
            return False
        if not self.column_selection == other.column_selection:
            return False
        return True

    def columns_used_from_sources(self, using: Optional[set] = None) -> List:
        """
        Get columns used from sources. Internal method.

        :param using: optional column restriction.
        :return: list of order sets (list parallel to sources).
        """
        cols = set(self.column_selection.copy())
        if using is None:
            return [cols]
        return [cols.intersection(using)]

    def to_python_src_(self, *, indent=0, strict=True, print_sources=True):
        """
        Return text representing operations.

        :param indent: additional indent to apply in formatting.
        :param strict: if False allow eliding of columns names and other long structures.
        :param print_sources: logical, print children.
        """
        s = ""
        if print_sources:
            s = (
                self.sources[0].to_python_src_(indent=indent, strict=strict)
                + "\n"
                + " " * (max(indent, 0) + 3)
            )
        s = s + (".select_columns(" + self.column_selection.__repr__() + ")")
        return s

    def to_near_sql_implementation_(
        self, db_model, *, using, temp_id_source, sql_format_options=None
    ) -> data_algebra.near_sql.NearSQL:
        """
        Convert operator dag into NearSQL type for translation to SQL string.

        :param db_model: database model
        :param using: optional column restriction set
        :param temp_id_source: source of temporary ids
        :param sql_format_options: options for sql formatting
        :return: data_algebra.near_sql.NearSQL
        """
        return db_model.select_columns_to_near_sql(
            self,
            using=using,
            temp_id_source=temp_id_source,
            sql_format_options=sql_format_options,
        )


class DropColumnsNode(ViewRepresentation):
    """
    Class representation of .drop_columns() method/step.
    """

    column_deletions: List[str]

    def __init__(self, source, column_deletions):
        if isinstance(column_deletions, str):
            column_deletions = [column_deletions]
        column_deletions = [c for c in column_deletions]
        self.column_deletions = column_deletions
        unknown = set(column_deletions) - set(source.column_names)
        if len(unknown) > 0:
            raise KeyError("dropping unknown columns " + str(unknown))
        remaining_columns = [
            c for c in source.column_names if c not in column_deletions
        ]
        if len(remaining_columns) < 1:
            raise ValueError("can not drop all columns")
        ViewRepresentation.__init__(
            self,
            column_names=remaining_columns,
            sources=[source],
            node_name="DropColumnsNode",
        )

    def replace_leaves(self, replacement_map: Dict[str, Any]):
        """
        Replace leaves of DAG

        :param a: operators to apply to
         :param replacement_map, table/sqlkeys mapped to replacement Operator platforms
        :return: new operator DAG
        """
        assert isinstance(replacement_map, dict)
        new_sources = [
            s.replace_leaves(replacement_map) for s in self.sources
        ]
        return new_sources[0].drop_columns(column_deletions=self.column_deletions)

    def _equiv_nodes(self, other):
        if not isinstance(other, DropColumnsNode):
            return False
        if not self.column_deletions == other.column_deletions:
            return False
        return True

    def columns_used_from_sources(self, using: Optional[set] = None) -> List:
        """
        Get columns used from sources. Internal method.

        :param using: optional column restriction.
        :return: list of order sets (list parallel to sources).
        """
        if using is None:
            using = set(self.sources[0].column_names)
        return [set([c for c in using if c not in self.column_deletions])]

    def to_python_src_(self, *, indent=0, strict=True, print_sources=True):
        """
        Return text representing operations.

        :param indent: additional indent to apply in formatting.
        :param strict: if False allow eliding of columns names and other long structures.
        :param print_sources: logical, print children.
        """
        s = ""
        if print_sources:
            s = (
                self.sources[0].to_python_src_(indent=indent, strict=strict)
                + "\n"
                + " " * (max(indent, 0) + 3)
            )
        s = s + (".drop_columns(" + self.column_deletions.__repr__() + ")")
        return s

    def to_near_sql_implementation_(
        self, db_model, *, using, temp_id_source, sql_format_options=None
    ) -> data_algebra.near_sql.NearSQL:
        """
        Convert operator dag into NearSQL type for translation to SQL string.

        :param db_model: database model
        :param using: optional column restriction set
        :param temp_id_source: source of temporary ids
        :param sql_format_options: options for sql formatting
        :return: data_algebra.near_sql.NearSQL
        """
        return db_model.drop_columns_to_near_sql(
            self,
            using=using,
            temp_id_source=temp_id_source,
            sql_format_options=sql_format_options,
        )


class OrderRowsNode(ViewRepresentation):
    """
    Class representation of .order_rows() method/step.
    """

    order_columns: List[str]
    reverse: List[str]

    def __init__(self, source, columns, *, reverse=None, limit=None):
        if isinstance(columns, str):
            columns = [columns]
        self.order_columns = [c for c in columns]
        if reverse is None:
            reverse = []
        if isinstance(reverse, str):
            reverse = [reverse]
        self.reverse = [c for c in reverse]
        self.limit = limit
        have = source.column_names
        unknown = set(self.order_columns) - set(have)
        if len(unknown) > 0:
            raise ValueError("missing required columns: " + str(unknown))
        not_order = set(self.reverse) - set(self.order_columns)
        if len(not_order) > 0:
            raise ValueError("columns declared reverse, but not order: " + str(unknown))
        ViewRepresentation.__init__(
            self,
            column_names=source.column_names,
            sources=[source],
            node_name="OrderRowsNode",
        )

    def replace_leaves(self, replacement_map: Dict[str, Any]):
        """
        Replace leaves of DAG

        :param a: operators to apply to
         :param replacement_map, table/sqlkeys mapped to replacement Operator platforms
        :return: new operator DAG
        """
        assert isinstance(replacement_map, dict)
        new_sources = [
            s.replace_leaves(replacement_map) for s in self.sources
        ]
        return new_sources[0].order_rows(
            columns=self.order_columns, reverse=self.reverse, limit=self.limit
        )

    def _equiv_nodes(self, other):
        if not isinstance(other, OrderRowsNode):
            return False
        if not self.order_columns == other.order_columns:
            return False
        if not self.reverse == other.reverse:
            return False
        if not self.limit == other.limit:
            return False
        return True

    def columns_used_from_sources(self, using: Optional[set] = None) -> List:
        """
        Get columns used from sources. Internal method.

        :param using: optional column restriction.
        :return: list of order sets (list parallel to sources).
        """
        cols = set(self.column_names)
        if using is None:
            return [cols]
        cols = cols.intersection(using).union(self.order_columns)
        return [cols]

    def to_python_src_(self, *, indent=0, strict=True, print_sources=True):
        """
        Return text representing operations.

        :param indent: additional indent to apply in formatting.
        :param strict: if False allow eliding of columns names and other long structures.
        :param print_sources: logical, print children.
        """
        s = ""
        if print_sources:
            s = (
                self.sources[0].to_python_src_(indent=indent, strict=strict)
                + "\n"
                + " " * (max(indent, 0) + 3)
            )
        s = s + (".order_rows(" + self.order_columns.__repr__())
        if len(self.reverse) > 0:
            s = s + ", reverse=" + self.reverse.__repr__()
        if self.limit is not None:
            s = s + ", limit=" + self.limit.__repr__()
        s = s + ")"
        return s

    def to_near_sql_implementation_(
        self, db_model, *, using, temp_id_source, sql_format_options=None
    ) -> data_algebra.near_sql.NearSQL:
        """
        Convert operator dag into NearSQL type for translation to SQL string.

        :param db_model: database model
        :param using: optional column restriction set
        :param temp_id_source: source of temporary ids
        :param sql_format_options: options for sql formatting
        :return: data_algebra.near_sql.NearSQL
        """
        return db_model.order_to_near_sql(
            self,
            using=using,
            temp_id_source=temp_id_source,
            sql_format_options=sql_format_options,
        )

    # short-cut main interface

    def is_trivial_when_intermediate_(self) -> bool:
        """
        Return if True if operator can be eliminated from interior of chain.
        """
        return self.limit is None


class MapColumnsNode(ViewRepresentation):
    """
    Class representation of .map_columns() method/step.
    """

    column_remapping: Dict[str, str]
    column_deletions: List[str]

    def __init__(self, source, column_remapping):
        self.column_remapping = {k: v for (k, v) in column_remapping.items() if v is not None}
        self.column_deletions = [k for (k, v) in column_remapping.items() if v is None]
        new_cols = [v for (k, v) in column_remapping.items() if v is not None]
        orig_cols = [k for k in column_remapping.keys()]
        unknown = set(orig_cols) - set(source.column_names)
        if len(unknown) > 0:
            raise ValueError("Tried to rename unknown columns: " + str(unknown))
        collisions = (
            set(source.column_names) - set(new_cols).intersection(orig_cols)
        ).intersection(new_cols)
        if len(collisions) > 0:
            raise ValueError(
                "Mapping "
                + str(self.column_remapping)
                + " collides with existing columns "
                + str(collisions)
            )
        column_names = [
            (k if k not in self.column_remapping.keys() else self.column_remapping[k])
            for k in source.column_names if k not in set(self.column_deletions)
        ]
        self.new_columns = set(new_cols) - set(orig_cols)
        ViewRepresentation.__init__(
            self,
            column_names=column_names,
            sources=[source],
            node_name="MapColumnsNode",
        )

    def replace_leaves(self, replacement_map: Dict[str, Any]):
        """
        Replace leaves of DAG

        :param a: operators to apply to
         :param replacement_map, table/sqlkeys mapped to replacement Operator platforms
        :return: new operator DAG
        """
        assert isinstance(replacement_map, dict)
        new_sources = [
            s.replace_leaves(replacement_map) for s in self.sources
        ]
        return new_sources[0].map_columns(column_remapping=self.column_remapping)

    def _equiv_nodes(self, other):
        if not isinstance(other, MapColumnsNode):
            return False
        if not (self.column_remapping == other.column_remapping):
            return False
        if not (self.column_deletions == other.column_deletions):
            return False
        return True

    def columns_used_from_sources(self, using: Optional[set] = None) -> List:
        """
        Get columns used from sources. Internal method.

        :param using: optional column restriction.
        :return: list of order sets (list parallel to sources).
        """
        if using is None:
            using_tuple = self.column_names
        else:
            using_tuple = tuple(using)
        cols = [k for k in using_tuple if k ]
        reverse_mapping = {v: k for k, v in self.column_remapping.items()}
        rev_keys = set(reverse_mapping.keys())
        cols = [
            (k if k not in rev_keys else reverse_mapping[k])
            for k in using_tuple
        ] + self.column_deletions
        return [OrderedSet(cols)]

    def to_python_src_(self, *, indent=0, strict=True, print_sources=True):
        """
        Return text representing operations.

        :param indent: additional indent to apply in formatting.
        :param strict: if False allow eliding of columns names and other long structures.
        :param print_sources: logical, print children.
        """
        s = ""
        if print_sources:
            s = (
                self.sources[0].to_python_src_(indent=indent, strict=strict)
                + "\n"
                + " " * (max(indent, 0) + 3)
            )
        column_remapping = self.column_remapping.copy()
        column_remapping.update({k: None for k in self.column_deletions})
        s = s + (".map_columns(" + column_remapping.__repr__() + ")")
        return s

    def to_near_sql_implementation_(
        self, db_model, *, using, temp_id_source, sql_format_options=None
    ) -> data_algebra.near_sql.NearSQL:
        """
        Convert operator dag into NearSQL type for translation to SQL string.

        :param db_model: database model
        :param using: optional column restriction set
        :param temp_id_source: source of temporary ids
        :param sql_format_options: options for sql formatting
        :return: data_algebra.near_sql.NearSQL
        """
        return db_model.map_columns_to_near_sql(
            self,
            using=using,
            temp_id_source=temp_id_source,
            sql_format_options=sql_format_options,
        )


class RenameColumnsNode(ViewRepresentation):
    """
    Class representation of .rename_columns() method/step.
    """

    column_remapping: Dict[str, str]
    reverse_mapping: Dict[str, str]

    def __init__(self, source, column_remapping):
        self.column_remapping = column_remapping.copy()
        self.reverse_mapping = {v: k for (k, v) in column_remapping.items()}
        new_cols = [k for k in column_remapping.keys()]
        orig_cols = [k for k in column_remapping.values()]
        unknown = set(orig_cols) - set(source.column_names)
        if len(unknown) > 0:
            raise ValueError("Tried to rename unknown columns: " + str(unknown))
        collisions = (
            set(source.column_names) - set(new_cols).intersection(orig_cols)
        ).intersection(new_cols)
        if len(collisions) > 0:
            raise ValueError(
                "Mapping "
                + str(self.column_remapping)
                + " collides with existing columns "
                + str(collisions)
            )
        column_names = [
            (k if k not in self.reverse_mapping.keys() else self.reverse_mapping[k])
            for k in source.column_names
        ]
        self.new_columns = set(new_cols) - set(orig_cols)
        ViewRepresentation.__init__(
            self,
            column_names=column_names,
            sources=[source],
            node_name="RenameColumnsNode",
        )

    def replace_leaves(self, replacement_map: Dict[str, Any]):
        """
        Replace leaves of DAG

        :param a: operators to apply to
         :param replacement_map, table/sqlkeys mapped to replacement Operator platforms
        :return: new operator DAG
        """
        assert isinstance(replacement_map, dict)
        new_sources = [
            s.replace_leaves(replacement_map) for s in self.sources
        ]
        return new_sources[0].rename_columns(column_remapping=self.column_remapping)

    def _equiv_nodes(self, other):
        if not isinstance(other, RenameColumnsNode):
            return False
        if not self.column_remapping == other.column_remapping:
            return False
        return True

    def columns_used_from_sources(self, using: Optional[set] = None) -> List:
        """
        Get columns used from sources. Internal method.

        :param using: optional column restriction.
        :return: list of order sets (list parallel to sources).
        """
        if using is None:
            using_tuple = self.column_names
        else:
            using_tuple = tuple(using)
        cols = [
            (k if k not in self.column_remapping.keys() else self.column_remapping[k])
            for k in using_tuple
        ]
        return [OrderedSet(cols)]

    def to_python_src_(self, *, indent=0, strict=True, print_sources=True):
        """
        Return text representing operations.

        :param indent: additional indent to apply in formatting.
        :param strict: if False allow eliding of columns names and other long structures.
        :param print_sources: logical, print children.
        """
        s = ""
        if print_sources:
            s = (
                self.sources[0].to_python_src_(indent=indent, strict=strict)
                + "\n"
                + " " * (max(indent, 0) + 3)
            )
        s = s + (".rename_columns(" + self.column_remapping.__repr__() + ")")
        return s

    def to_near_sql_implementation_(
        self, db_model, *, using, temp_id_source, sql_format_options=None
    ) -> data_algebra.near_sql.NearSQL:
        """
        Convert operator dag into NearSQL type for translation to SQL string.

        :param db_model: database model
        :param using: optional column restriction set
        :param temp_id_source: source of temporary ids
        :param sql_format_options: options for sql formatting
        :return: data_algebra.near_sql.NearSQL
        """
        return db_model.rename_to_near_sql(
            self,
            using=using,
            temp_id_source=temp_id_source,
            sql_format_options=sql_format_options,
        )


class NaturalJoinNode(ViewRepresentation):
    """
    Class representation of .natural_join() method/step.
    """

    on_a: List[str]
    on_b: List[str]
    jointype: str

    def __init__(
        self,
        a,
        b,
        *,
        on_a: List[str],
        on_b: List[str],
        jointype: str,
        check_all_common_keys_in_equi_spec: bool = False,
    ):
        # check set of tables is consistent in both sub-dags
        a_tables = a.get_tables()
        b_tables = b.get_tables()
        _assert_tables_defs_consistent(a_tables, b_tables)
        assert isinstance(on_a, List)
        assert numpy.all([isinstance(v, str) for v in on_a])
        assert isinstance(on_b, List)
        assert numpy.all([isinstance(v, str) for v in on_b])
        assert len(on_a) == len(on_b)
        common_table_keys = set(a_tables.keys()).intersection(b_tables.keys())
        for k in common_table_keys:
            if not a_tables[k].same_table_description_(b_tables[k]):
                raise ValueError(
                    "Different definition of table object on a/b for: " + k
                )
        # check columns
        column_names = list(a.column_names)
        columns_seen = set(column_names)
        for ci in b.column_names:
            if ci not in columns_seen:
                column_names.append(ci)
                columns_seen.add(ci)
        missing_left = set(on_a) - set(a.column_names)
        if len(missing_left) > 0:
            raise KeyError("left table missing join keys: " + str(missing_left))
        missing_right = set(on_b) - set(b.column_names)
        if len(missing_right) > 0:
            raise KeyError("right table missing join keys: " + str(missing_right))
        if check_all_common_keys_in_equi_spec:
            missing_common = (
                set(a.column_names).intersection(set(b.column_names)) - set(on_a).intersection(on_b)
            )
            if len(missing_common) > 0:
                raise KeyError(
                    "check_all_common_keys_in_equi_spec set, and the following common keys are are not in the on-clause: "
                    + str(missing_common)
                )
        # try to re-use column names if possible, saves space in deeply nested join trees.
        column_names = tuple(column_names)
        if isinstance(a.column_names, tuple) and (
            set(column_names) == set(a.column_names)
        ):
            column_names = a.column_names
        elif isinstance(b.column_names, tuple) and (
            set(column_names) == set(b.column_names)
        ):
            column_names = b.column_names
        ViewRepresentation.__init__(
            self,
            column_names=column_names,
            sources=[a, b],
            node_name="NaturalJoinNode",
        )
        self.on_a = on_a
        self.on_b = on_b
        self.jointype = data_algebra.expr_rep.standardize_join_type(jointype)
        if (self.jointype == "CROSS") and (len(self.on_a) != 0):
            raise ValueError("CROSS joins must have an empty 'on' list")

    def replace_leaves(self, replacement_map: Dict[str, Any]):
        """
        Replace leaves of DAG

        :param a: operators to apply to
         :param replacement_map, table/sqlkeys mapped to replacement Operator platforms
        :return: new operator DAG
        """
        assert isinstance(replacement_map, dict)
        new_sources = [
            s.replace_leaves(replacement_map) for s in self.sources
        ]
        return new_sources[0].natural_join(
            b=new_sources[1], on=[(va, vb) for (va, vb) in zip(self.on_a, self.on_b)], jointype=self.jointype
        )

    def _equiv_nodes(self, other):
        if not isinstance(other, NaturalJoinNode):
            return False
        if not self.on_a == other.on_a:
            return False
        if not self.on_b == other.on_b:
            return False
        if not self.jointype == other.jointype:
            return False
        return True

    def columns_used_from_sources(self, using: Optional[set] = None) -> List:
        """
        Get columns used from sources. Internal method.

        :param using: optional column restriction.
        :return: list of order sets (list parallel to sources).
        """
        if using is None:
            return [OrderedSet(self.sources[i].column_names) for i in range(2)]
        using = using.union(self.on_a).union(self.on_b)
        return [
            ordered_intersect(self.sources[i].column_names, using) for i in range(2)
        ]

    def to_python_src_(self, *, indent=0, strict=True, print_sources=True):
        """
        Return text representing operations.

        :param indent: additional indent to apply in formatting.
        :param strict: if False allow eliding of columns names and other long structures.
        :param print_sources: logical, print children.
        """
        s = "_0."
        if print_sources:
            s = (
                self.sources[0].to_python_src_(indent=indent, strict=strict)
                + "\n"
                + " " * (max(indent, 0) + 3)
            )
        s = s + (".natural_join(b=\n" + " " * (indent + 6))
        if print_sources:
            s = s + (
                self.sources[1].to_python_src_(indent=max(indent, 0) + 6, strict=strict)
                + ",\n"
                + " " * (max(indent, 0) + 6)
            )
        else:
            s = s + " _1, "
        on_arg = _convert_parallel_lists_to_on_clause(self.on_a, self.on_b)
        s = s + (
            "on=" + on_arg.__repr__() + ", jointype=" + self.jointype.__repr__() + ")"
        )
        return s

    def to_near_sql_implementation_(
        self, db_model, *, using, temp_id_source, sql_format_options=None
    ) -> data_algebra.near_sql.NearSQL:
        """
        Convert operator dag into NearSQL type for translation to SQL string.

        :param db_model: database model
        :param using: optional column restriction set
        :param temp_id_source: source of temporary ids
        :param sql_format_options: options for sql formatting
        :return: data_algebra.near_sql.NearSQL
        """
        return db_model.natural_join_to_near_sql(
            self,
            using=using,
            temp_id_source=temp_id_source,
            sql_format_options=sql_format_options,
        )


class ConcatRowsNode(ViewRepresentation):
    """
    Class representation of .concat_rows() method/step.
    """

    id_column: Union[str, None]

    def __init__(self, a, b, *, id_column="table_name", a_name="a", b_name="b"):
        # check set of tables is consistent in both sub-dags
        assert isinstance(a, ViewRepresentation)
        assert isinstance(b, ViewRepresentation)
        a_tables = a.get_tables()
        b_tables = b.get_tables()
        _assert_tables_defs_consistent(a_tables, b_tables)
        common_keys = set(a_tables.keys()).intersection(b_tables.keys())
        for k in common_keys:
            if not a_tables[k].same_table_description_(b_tables[k]):
                raise ValueError(
                    "Different definition of table object on a/b for: " + k
                )
        sources = [a, b]
        # check columns
        if not set(sources[0].column_names) == set(sources[1].column_names):
            raise ValueError("a and b should have same set of column names")
        if id_column is not None and id_column in sources[0].column_names:
            raise ValueError("id_column should not be an input table column name")
        column_names = list(sources[0].column_names)
        if id_column is not None:
            assert id_column not in column_names
            column_names.append(id_column)
        ViewRepresentation.__init__(
            self, column_names=column_names, sources=sources, node_name="ConcatRowsNode"
        )
        self.id_column = id_column
        self.a_name = a_name
        self.b_name = b_name

    def replace_leaves(self, replacement_map: Dict[str, Any]):
        """
        Replace leaves of DAG

        :param a: operators to apply to
         :param replacement_map, table/sqlkeys mapped to replacement Operator platforms
        :return: new operator DAG
        """
        assert isinstance(replacement_map, dict)
        new_sources = [
            s.replace_leaves(replacement_map) for s in self.sources
        ]
        return new_sources[0].concat_rows(
            b=new_sources[1],
            id_column=self.id_column,
            a_name=self.a_name,
            b_name=self.b_name,
        )

    def _equiv_nodes(self, other):
        if not isinstance(other, ConcatRowsNode):
            return False
        if not self.id_column == other.id_column:
            return False
        if not self.a_name == other.a_name:
            return False
        if not self.b_name == other.b_name:
            return False
        return True

    def columns_used_from_sources(self, using: Optional[set] = None) -> List:
        """
        Get columns used from sources. Internal method.

        :param using: optional column restriction.
        :return: list of order sets (list parallel to sources).
        """
        if using is None:
            return [OrderedSet(self.sources[i].column_names) for i in range(2)]
        return [
            ordered_intersect(self.sources[i].column_names, using) for i in range(2)
        ]

    def to_python_src_(self, *, indent=0, strict=True, print_sources=True):
        """
        Return text representing operations.

        :param indent: additional indent to apply in formatting.
        :param strict: if False allow eliding of columns names and other long structures.
        :param print_sources: logical, print children.
        """
        s = "_0."
        if print_sources:
            s = (
                self.sources[0].to_python_src_(indent=indent, strict=strict)
                + "\n"
                + " " * (max(indent, 0) + 3)
            )
        s = s + (".concat_rows(b=\n" + " " * (indent + 6))
        if print_sources:
            s = s + (
                self.sources[1].to_python_src_(indent=max(indent, 0) + 6, strict=strict)
                + ",\n"
                + " " * (max(indent, 0) + 6)
            )
        else:
            s = s + " _1, "
        s = s + (
            "id_column="
            + self.id_column.__repr__()
            + ", a_name="
            + self.a_name.__repr__()
            + ", b_name="
            + self.b_name.__repr__()
            + ")"
        )
        return s

    def to_near_sql_implementation_(
        self, db_model, *, using, temp_id_source, sql_format_options=None
    ) -> data_algebra.near_sql.NearSQL:
        """
        Convert operator dag into NearSQL type for translation to SQL string.

        :param db_model: database model
        :param using: optional column restriction set
        :param temp_id_source: source of temporary ids
        :param sql_format_options: options for sql formatting
        :return: data_algebra.near_sql.NearSQL
        """
        return db_model.concat_rows_to_near_sql(
            self,
            using=using,
            temp_id_source=temp_id_source,
            sql_format_options=sql_format_options,
        )


class ConvertRecordsNode(ViewRepresentation):
    """
    Class representation of .convert_records() method/step.
    """

    def __init__(self, *, source, record_map):
        self.record_map = record_map
        unknown = set(self.record_map.columns_needed) - set(source.column_names)
        if len(unknown) > 0:
            raise ValueError("missing required columns: " + str(unknown))
        ViewRepresentation.__init__(
            self,
            column_names=record_map.columns_produced,
            sources=[source],
            node_name="ConvertRecordsNode",
        )

    def replace_leaves(self, replacement_map: Dict[str, Any]):
        """
        Replace leaves of DAG

        :param a: operators to apply to
         :param replacement_map, table/sqlkeys mapped to replacement Operator platforms
        :return: new operator DAG
        """
        assert isinstance(replacement_map, dict)
        new_sources = [
            s.replace_leaves(replacement_map) for s in self.sources
        ]
        return new_sources[0].convert_records(record_map=self.record_map)

    def _equiv_nodes(self, other):
        if not isinstance(other, ConvertRecordsNode):
            return False
        if not self.record_map == other.record_map:
            return False
        return True

    def columns_used_from_sources(self, using: Optional[set] = None) -> List:
        """
        Get columns used from sources. Internal method.

        :param using: optional column restriction.
        :return: list of order sets (list parallel to sources).
        """
        return [self.record_map.columns_needed]

    def to_python_src_(self, *, indent=0, strict=True, print_sources=True):
        """
        Return text representing operations.

        :param indent: additional indent to apply in formatting.
        :param strict: if False allow eliding of columns names and other long structures.
        :param print_sources: logical, print children.
        """
        s = ""
        if print_sources:
            s = (
                self.sources[0].to_python_src_(indent=indent, strict=strict)
                + "\n"
                + " " * (max(indent, 0) + 3)
            )
        rm_str = self.record_map.__repr__()
        rm_str = re.sub("\n", "\n   ", rm_str)
        s = s + ".convert_records(" + rm_str
        s = s + ")"
        return s

    def to_near_sql_implementation_(
        self, db_model, *, using, temp_id_source, sql_format_options=None
    ) -> data_algebra.near_sql.NearSQL:
        """
        Convert operator dag into NearSQL type for translation to SQL string.

        :param db_model: database model
        :param using: optional column restriction set
        :param temp_id_source: source of temporary ids
        :param sql_format_options: options for sql formatting
        :return: data_algebra.near_sql.NearSQL
        """
        if temp_id_source is None:
            temp_id_source = [0]
        near_sql = self.sources[0].to_near_sql_implementation_(
            db_model=db_model,
            using=None,
            temp_id_source=temp_id_source,
            sql_format_options=sql_format_options,
        )
        assert isinstance(near_sql, data_algebra.near_sql.NearSQL)
        # claims to use all columns
        if self.record_map.blocks_in is not None:
            view_name = "convert_records_blocks_in_" + str(temp_id_source[0])
            temp_id_source[0] = temp_id_source[0] + 1
            pi, si = db_model.blocks_to_row_recs_query_str_list_pair(
                record_spec=self.record_map.blocks_in
            )
            near_sql = data_algebra.near_sql.NearSQLRawQStep(
                prefix=pi,
                query_name=view_name,
                quoted_query_name=db_model.quote_identifier(view_name),
                sub_sql=data_algebra.near_sql.NearSQLContainer(near_sql=near_sql),
                suffix=si,
                annotation="convert records blocks in",
                ops_key=None,
            )
            assert isinstance(near_sql, data_algebra.near_sql.NearSQL)
        if self.record_map.blocks_out is not None:
            view_name = "convert_records_blocks_out_" + str(temp_id_source[0])
            temp_id_source[0] = temp_id_source[0] + 1
            pi, si = db_model.row_recs_to_blocks_query_str_list_pair(
                record_spec=self.record_map.blocks_out
            )
            near_sql = data_algebra.near_sql.NearSQLRawQStep(
                prefix=pi,
                query_name=view_name,
                quoted_query_name=db_model.quote_identifier(view_name),
                sub_sql=data_algebra.near_sql.NearSQLContainer(near_sql=near_sql),
                suffix=si,
                annotation="convert records blocks out",
                ops_key=None,
            )
            assert isinstance(near_sql, data_algebra.near_sql.NearSQL)
        return near_sql


class SQLNode(ViewRepresentation):
    """
    Class representation of user SQL step in pipeline. Can be used to start a pipeline instead of a TableDescription.
    """

    def __init__(
        self, *, sql: Union[str, List[str]], column_names: List[str], view_name: str
    ):
        if isinstance(sql, str):
            sql = sql.splitlines(keepends=False)
            sql = [v for v in sql if len(v.strip()) > 0]
        assert isinstance(sql, list)
        assert len(sql) > 0
        assert all([isinstance(v, str) for v in sql])
        assert isinstance(view_name, str)
        self.sql = sql.copy()
        self.view_name = view_name
        ViewRepresentation.__init__(
            self,
            column_names=column_names,
            node_name="SQLNode",
        )

    def replace_leaves(self, replacement_map: Dict[str, Any]):
        """
        Replace leaves of DAG

        :param a: operators to apply to
         :param replacement_map, table/sqlkeys mapped to replacement Operator platforms
        :return: new operator DAG
        """
        assert isinstance(replacement_map, dict)
        try:
            return replacement_map[self.view_name]
        except KeyError:
            pass
        # copy self
        r = SQLNode(
            sql=self.sql.copy(),
            column_names=self.column_names.copy(),
            view_name=self.view_name
        )
        return r

    def _equiv_nodes(self, other):
        if not isinstance(other, SQLNode):
            return False
        if self.view_name != other.view_name:
            return False
        if self.column_names != other.column_names:
            return False
        if self.sql != other.sql:
            return False
        return True

    def get_tables(self):
        """Get a dictionary of all tables used in an operator DAG,
        raise an exception if the values are not consistent."""
        return dict()

    def columns_used_from_sources(self, using: Optional[set] = None) -> List:
        """
        Get columns used from sources. Internal method.

        :param using: optional column restriction.
        :return: list of order sets (list parallel to sources).
        """
        return []

    def to_python_src_(self, *, indent=0, strict=True, print_sources=True):
        """
        Return text representing operations.

        :param indent: additional indent to apply in formatting.
        :param strict: if False allow eliding of columns names and other long structures.
        :param print_sources: logical, print children.
        """
        s = (
            "SQLNode(sql="
            + str(self.sql)
            + ", column_names="
            + str(self.column_names)
            + ", view_name="
            + self.view_name.__repr__()
            + ")"
        )
        return s

    def to_near_sql_implementation_(
        self, db_model, *, using, temp_id_source, sql_format_options=None
    ) -> data_algebra.near_sql.NearSQL:
        """
        Convert operator dag into NearSQL type for translation to SQL string.

        :param db_model: database model
        :param using: optional column restriction set
        :param temp_id_source: source of temporary ids
        :param sql_format_options: options for sql formatting
        :return: data_algebra.near_sql.NearSQL
        """
        quoted_query_name = db_model.quote_identifier(self.view_name)
        near_sql = data_algebra.near_sql.NearSQLRawQStep(
            prefix=self.sql,
            query_name=self.view_name,
            quoted_query_name=quoted_query_name,
            sub_sql=None,
            suffix=None,
            annotation="user supplied SQL",
            add_select=False,
            ops_key=None,
        )
        return near_sql


def ex(d, *, data_model=None, allow_limited_tables: bool = False):
    """
    Evaluate operators with respect to Pandas data frames already stored in the operator chain.

    :param d: data algebra pipeline/DAG to evaluate.
    :param data_model: adaptor to data dialect
    :param allow_limited_tables: logical, if True allow execution on non-complete tables
    :return: table result
    """
    return d.ex(
        data_model=data_model, allow_limited_tables=allow_limited_tables
    )
