import logging

import numpy as np

logger = logging.getLogger(__name__)


def flatten_arrays_for_duckdb(df):
    """
    DuckDB doesn't support creating columns of arrays. It returns the values always as NaN. So, as a workaround, convert
    all array columns to string.

    The docs aren't clear to me, so this understanding may not be entirely correct. References:
    - https://duckdb.org/docs/sql/data_types/nested
    - https://github.com/duckdb/duckdb/issues/1421
    """
    flattened = set()
    for col_name, dtype in zip(df.dtypes.index, df.dtypes):
        if dtype == np.dtype('O'):
            # "Object" type. anything non-numeric, or of mixed-type, is type Object in pandas. So we need to further
            # specifically inspect for arrays.
            if df[col_name].apply(lambda x: isinstance(x, list)).any():
                df[col_name] = df[col_name].astype(str)
                flattened.add(col_name)
    if flattened:
        logger.warning(f'Flattened some columns into strings for in-memory query: {", ".join(flattened)}')


def query_in_memory(df, query):
    import duckdb
    flatten_arrays_for_duckdb(df)
    if 'log_timestamp' in df.columns:
        import ciso8601
        # TODO: This is a hack. I'm leaving it here as a clear demonstration of missing feature requirements
        # (schema management & schema coercion). Similarly to coercing types at this point, we should be able to force
        # existence of columns (set to all NULL if no data available).
        df['log_timestamp'] = df.apply(lambda r: ciso8601.parse_datetime(r['log_timestamp']), axis=1)
    if 'raw.synthesis.networkinformation.rssi' in df.columns:
        df['raw.synthesis.networkinformation.rssi'] = df.apply(
            lambda r: int(r['raw.synthesis.networkinformation.rssi']) if r['raw.synthesis.networkinformation.rssi'] not in (None, '') else None,
            axis=1
        )
    duck_conn = duckdb.connect(database=':memory:', read_only=False)
    duck_conn.register('data', df)
    duck_conn.execute(query)
    result_df = duck_conn.fetchdf()
    return result_df
