# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_utils.ipynb.

# %% auto 0
__all__ = ['parse_code_if_in_regex', 'comment_regex', 'delimit_regex', 'multi_regex', 'parse_code_if_in', 'in_range',
           'is_cr_delimiter', 'ending_delimiter', 'standardize_code', 'is_start_of_program_block',
           'break_out_prog_blocks', 'HiddenPrints', 'print_red']

# %% ../nbs/01_utils.ipynb 4
import re
import sys
import os

# %% ../nbs/01_utils.ipynb 6
parse_code_if_in_regex = re.compile(
    r'\A(?P<code>(?!if\s)(?!\sif)(?!in\s)(?!\sin).+?)?(?P<if>\s*if\s+.+?)?(?P<in>\s*in\s.+?)?\Z',
    flags=re.DOTALL + re.MULTILINE
)

# %% ../nbs/01_utils.ipynb 7
def parse_code_if_in(code):
    """Parse line of Stata code into code, if, in"""
    match = parse_code_if_in_regex.match(code.strip())
    if match:
        args = match.groupdict()
        for k in args:
            args[k] = args[k] if isinstance(args[k],str) else ''   
    else:
        args = {'code':code,
                'if':'',
                'in':''}    
    return args

# %% ../nbs/01_utils.ipynb 11
def in_range(stata_in_code):
    """Return in-statement range"""    
    stata_range_code = stata_in_code.replace(' in ','').strip()
    slash_pos = stata_range_code.find('/')
    if slash_pos == -1:
        return (None, None)
    start = stata_range_code[:slash_pos]
    end = stata_range_code[slash_pos+1:]
    if start.strip() == 'f': start = 1
    if end.strip() == 'l': end = count()
    return (int(start)-1, int(end))

# %% ../nbs/01_utils.ipynb 15
# Detect comments spanning multiple lines
comment_regex = re.compile(r'(((?: |\t)\/\/\/)(.)*(\n|\r)|(\/\*)(.|\s)*?(\*\/))')

def _remove_multi_line_comments(code):
    return comment_regex.sub(' ',code)

# %% ../nbs/01_utils.ipynb 21
def is_cr_delimiter(delimiter):
    return delimiter in {'cr', None}

# %% ../nbs/01_utils.ipynb 23
delimit_regex = re.compile(r'#delimit(.*$)', flags=re.MULTILINE)
def _replace_delimiter(code, starting_delimiter=None):
    # Recursively replace custom delimiter with newline

    split = delimit_regex.split(code.strip(),maxsplit=1)

    if len(split) == 3:
        before = split[0]
        after = _replace_delimiter(split[2],split[1].strip())
    else:
        before = code
        after = ''

    if not is_cr_delimiter(starting_delimiter):
        before = before.replace('\r', '').replace('\n', '')
        before = before.replace(';','\n')

    return before + after

# %% ../nbs/01_utils.ipynb 25
def ending_delimiter(code, starting_delimiter=None):
    code = _remove_multi_line_comments(code)
    # Recursively determine ending delimiter
    split = delimit_regex.split(code.strip(),maxsplit=1)
    if len(split) == 3:
        delimiter = ending_delimiter(split[2],split[1].strip())
    elif len(split) == 2:
        delimiter = split[1].strip()
    else:
        delimiter = starting_delimiter
    return None if is_cr_delimiter(delimiter) else ';'

# %% ../nbs/01_utils.ipynb 28
# Detect Multiple whitespace
multi_regex = re.compile(r' +')

def standardize_code(code, starting_delimiter=None):
    """Remove comments spanning multiple lines and replace custom delimiters"""
    code = _remove_multi_line_comments(code)
    
    # After removing multi-line comments, which could include "#delimit;"
    code = _replace_delimiter(code, starting_delimiter) 
    
    # Replace multiple whitespace with one
    code = multi_regex.sub(' ',code)
    
    # Delete blank lines and whitespace at start and end of lines
    cl = code.splitlines()
    co = []
    for c in cl:
        cs = c.strip()
        if cs:
            co.append(cs)
    return '\n'.join(co)

# %% ../nbs/01_utils.ipynb 36
def _startswith_stata_abbrev(string, full_command, shortest_abbrev):
    for j in range(len(shortest_abbrev), len(full_command)+1):
        if string.startswith(full_command[0:j] + ' '):
            return True
    return False

# %% ../nbs/01_utils.ipynb 39
def _remove_prog_prefixes(cs):
    if (_startswith_stata_abbrev(cs, 'quietly', 'qui')
        or cs.startswith('capture ')
        or _startswith_stata_abbrev(cs, 'noisily', 'n')):
        return _remove_prog_prefixes(cs.split(None, maxsplit=1)[1])
    else:
        return cs

# %% ../nbs/01_utils.ipynb 41
def is_start_of_program_block(std_code_line):
    cs = _remove_prog_prefixes(std_code_line)
    _starts_program = (_startswith_stata_abbrev(cs, 'program', 'pr')
                       and not (cs == 'program di'
                                or cs == 'program dir'
                                or cs.startswith('program drop ')
                                or _startswith_stata_abbrev(cs, 'program list', 'program l')))
    return (_starts_program
            or (cs in {'mata', 'mata:'})
            or (cs in {'python', 'python:'}))

# %% ../nbs/01_utils.ipynb 43
def break_out_prog_blocks(code, starting_delimiter=None):
    cl = standardize_code(code, starting_delimiter).splitlines()
    co = []
    blocks = []
    for c in cl:
        # Are we starting a program definition?
        if is_start_of_program_block(c):
            if co: # lines before the start of a program block
                blocks.append({"is_prog": False, "std_code": '\n'.join(co)})
                co = []

        co.append(c)

        # Are we ending a program definition?
        if c == 'end':
            blocks.append({"is_prog": True, "std_code": '\n'.join(co)})
            co = []

    if co: 
        blocks.append({"is_prog": False, "std_code": '\n'.join(co)})
    return blocks

# %% ../nbs/01_utils.ipynb 46
class HiddenPrints:
    """A context manager for suppressing `print` output"""
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')
    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout

# %% ../nbs/01_utils.ipynb 49
def print_red(text):
    print(f"\x1b[31m{text}\x1b[0m")
