# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/10_completion_env.ipynb.

# %% auto 0
__all__ = ['stata_lexer', 'CompletionEnv', 'Env']

# %% ../nbs/10_completion_env.ipynb 4
from .code_utils import ending_sc_delimiter
from fastcore.basics import patch_to
from enum import IntEnum
from typing import Tuple
import re

# %% ../nbs/10_completion_env.ipynb 5
from pygments import lexers
from pygments.token import Comment, Keyword, Name, Number, \
    String, Text, Operator

# %% ../nbs/10_completion_env.ipynb 6
stata_lexer = lexers.get_lexer_by_name('stata')

def _lex_tokens(code):
    return list(stata_lexer.get_tokens_unprocessed(code))

# %% ../nbs/10_completion_env.ipynb 7
def _last_token(code):
    tokens = _lex_tokens(code)
    last_tokentype = tokens[-1][1]
    tokens_to_combine = []
    for token in reversed(tokens):
        if token[1] is last_tokentype:
            tokens_to_combine.append(token)
        else:
            break
    tokens_to_combine = list(reversed(tokens_to_combine))
    return (min(tokens_to_combine, key=lambda t: t[0])[0], last_tokentype, "".join([t[2] for t in tokens_to_combine]))

# %% ../nbs/10_completion_env.ipynb 8
def _last_token_full_string(code, sc_delimiter=False):
    if not code:
        return (0, None, "")
    prefix = ""
    if sc_delimiter:
        prefix = "#delimit;\n"
        orig_code = code
        code = prefix + orig_code
    tokens = _lex_tokens(code)
    last_tokentype = tokens[-1][1]
    tokens_to_combine = []
    for token in reversed(tokens):
        if token[1] is last_tokentype:
            tokens_to_combine.append(token)
        else:
            break
    tokens_to_combine = list(reversed(tokens_to_combine))
    index = min(tokens_to_combine, key=lambda t: t[0])[0]
    value = "".join([t[2] for t in tokens_to_combine])
    while last_tokentype == String and value[0] != '"' and value[0:2] != '`"':
        tokens_to_combine = list(reversed(tokens_to_combine))
        reversed_remaining_tokens = list(reversed(_lex_tokens(code[:index])))
        for i, token in enumerate(reversed_remaining_tokens):
            if (token[1] is not String 
                and reversed_remaining_tokens[i-1][1] is String):
                break
            elif token[1] in [Comment.Single, Comment.Multiline, Comment.Special]:
                break
            tokens_to_combine.append(token)
        tokens_to_combine = list(reversed(tokens_to_combine))
        index = min(tokens_to_combine, key=lambda t: t[0])[0]
        value = "".join([t[2] for t in tokens_to_combine])
    index = index-len(prefix)
    if index < 0:
        value = value[-index:]
        index = 0
    return (index, last_tokentype, value)

# %% ../nbs/10_completion_env.ipynb 28
class CompletionEnv():
    def __init__(self):
        """"""
        self.last_word = re.compile(
            r'\W\w*?\Z', flags=re.MULTILINE).search
        
        # any non-space/"/= 'chunk' at the end of the string after the last ", =, or white space
        self.last_chunk = re.compile(
            r'[\s"=][^\s"=]*?\Z', flags=re.MULTILINE).search
        
        # Path completion
        self.path_search = re.compile(
            r'^(?P<fluff>.*")(?P<path>[^"]*)\Z').search

        # Magic completion
        self.magic_completion = re.compile(
            r'\A\*?%(?P<magic>\S*)\Z', flags=re.DOTALL + re.MULTILINE).match

        # Match context; this is used to determine if the line starts
        # with matrix or scalar. It also matches constructs like
        #
        #     (`=)?scalar(
        pre = (
            r'(cap(t|tu|tur|ture)?'
            r'|qui(e|et|etl|etly)?'
            r'|n(o|oi|ois|oisi|oisil|oisily)?)')
        kwargs = {'flags': re.MULTILINE}
        self.fcontext = {
            'function':
                re.compile(
                    r"(\s+|\=|`=)\s*(?P<name>\w+?)"
                    r"\([^\)]*?(?P<last_word>\w*)\Z", **kwargs).search,
        }
        self.context = {
            'line':
                re.compile(
                    r"^(?P<last_line>\s*({0}\s+)*(?P<first_word>\S+) .*?)\Z".format(pre),
                    **kwargs).search,
            'delimit_line':
                re.compile(
                    r"(?:\A|;)(?P<last_line>\s*({0}\s+)*(?P<first_word>[^\s;]+)\s[^;]*?)\Z".format(pre),
                    **kwargs).search
        }
#         self.last_line = {
#             'line':
#                 re.compile(
#                     r"^(?P<last_line>.*)\Z",
#                     **kwargs).search,
#             'delimit_line':
#                 re.compile(
#                     r"(?:\A|;)(?P<last_line>[^;]*)\Z",
#                     **kwargs).search
#         }

#         self.ends_in_a_comment = re.compile(
#             r'('
#             r'(^((\s*\*)|((.*( |\t))?\/\/)).*)'     # last line starting with '*' or containing ' //'
#             r'|(\/\*)([^\*\/]|\*(?!\/)|\/(?<!\*))*' # unfinished /* block
#             r')\Z', flags=re.MULTILINE).search

#         self.ends_in_a_string_literal = re.compile(
#             r'(\`\")' # start of a `" block
#             r'(' 
#             r'([^\"\']|\"(?!\')|\'(?<!\"))*' 
#             r'([^\"\']|\"(?!\')|\'(?<!\"))*' 
#             r'([^\"\']|\"(?!\')|\'(?<!\"))*' 
#             r')*\Z').search

# %% ../nbs/10_completion_env.ipynb 29
def _ends_in_string_literal(code, sc_delimiter=False):
    if sc_delimiter:
        code = "#delimit;\n" + code
    return _last_token(code)[1] is String

# %% ../nbs/10_completion_env.ipynb 30
def _ends_in_a_comment(code, sc_delimiter=False):
    if sc_delimiter:
        code = "#delimit;\n" + code
    return _last_token(code)[1] in [Comment.Single, Comment.Multiline, Comment.Special]

# %% ../nbs/10_completion_env.ipynb 37
@patch_to(CompletionEnv)
def _scalar_f_pos_rcomp(self, code, r2chars):
    scalar_f = False
    funcontext = self.fcontext['function'](code)
    if funcontext:
        function = funcontext.group('name')
        if function == 'scalar':
            scalar_f = True
            pos = funcontext.start('last_word') if funcontext.start('last_word') else len(code)
            rcomp = "" if (r2chars[0:1] == ")" or r2chars == " )") else ")"
    if scalar_f:
        return True, pos, rcomp
    else:
        return False, None, None

# %% ../nbs/10_completion_env.ipynb 43
@patch_to(CompletionEnv)
def _start_of_last_chunk(self, code):
    search = self.last_chunk(code)
    return search.start() + 1 if search else 0

# %% ../nbs/10_completion_env.ipynb 46
@patch_to(CompletionEnv)
def _start_of_last_word(self, code):
    search = self.last_word(code)
    return search.start() + 1 if search else 0

# %% ../nbs/10_completion_env.ipynb 49
@patch_to(CompletionEnv)
def _last_line_first_word(self, code, sc_delimiter=False):
    if sc_delimiter:
        linecontext = self.context['delimit_line'](code)
    else:
        linecontext = self.context['line'](code)
    if linecontext:
        last_line = linecontext.groupdict()['last_line']
        first_word = linecontext.groupdict()['first_word']
        return last_line, first_word
    else:
        return None, None

# %% ../nbs/10_completion_env.ipynb 55
class Env(IntEnum):
    NONE = -9      # no suggestions
    MAGIC = -1     # magics, %x*
    GENERAL = 0    # varlist and/or file path
    LOCAL = 1      # `x* completed with `x*'
    GLOBAL = 2     # $x* completed with $x* or ${x* completed with ${x*}
    SCALAR = 4     # scalar .* x* completed with x* or scalar(x* completed with scalar(x*)
    MATRIX = 6     # matrix .* x* completed with x*
    SCALAR_VAR = 7 # scalars and varlist, scalar .* = x* completed with x*
    MATRIX_VAR = 8 # matrices and varlist, matrix .* = x* completed with x*
    MATA = 9       # inline or in mata environment
    STRING = 10    # file path

# %% ../nbs/10_completion_env.ipynb 56
@patch_to(CompletionEnv)
def get_env(self, 
            code: str, # Right-truncated to cursor position
            r2chars: str, # The two characters immediately after `code`, used to accurately determine rcomp
            sc_delimiter,
           ) -> Tuple[Env, int, str, str]:
    """Returns completions environment
    
    Returns
    -------
    env : Env    
    pos : int
        Where the completions start. This is set to the start of the word to be completed.
    out_chunk : str
        Word to match.
    rcomp : str
        How to finish the completion (defaulting to nothing):
        locals: '
        globals (if start with ${): }
        scalars: )
        scalars (if start with `): )'
    """
    rcomp = ""
    
    lcode = code.lstrip()
    if self.magic_completion(lcode):
        pos = code.rfind("%") + 1
        env = Env.MAGIC
        return env, pos, code[pos:], rcomp
    
    sc_delimiter = ending_sc_delimiter(code, sc_delimiter)
    env = Env.GENERAL   
    
    pos = self._start_of_last_word(code)

    if _ends_in_a_comment(code, sc_delimiter):
        return env, pos, code[pos:], rcomp

    last_token_index, last_token_type, last_token_value = _last_token_full_string(code, sc_delimiter)
    
    if last_token_type is String:
        if (not _ends_in_string_literal(code + " ", sc_delimiter)
            or not (last_token_value.startswith('"')
                    or last_token_value.startswith('`"'))):
            return Env.NONE, len(code)-1, rcomp
        if last_token_value.startswith('"'):
            opening_marker_length = 1
            rcomp = "" if r2chars[0:1] == '"' else '"'
        elif last_token_value.startswith('`"'):
            opening_marker_length = 2
            rcomp = "" if r2chars[0:2] == "\"'" else "\"'"
        pos = last_token_index + opening_marker_length
        env = Env.STRING
    else:
        # Figure out if this is a local or global; env = 0 (default)
        # will suggest variables in memory.
        cpos = self._start_of_last_chunk(code) # last "chunk" delimited by white space, a double-quote, or =.
        chunk = code[cpos:]
        lfind = chunk.rfind('`')
        gfind = chunk.rfind('$')
        path_chars = any(x in chunk for x in ['/', '\\', '~'])

        if lfind >= 0 and (lfind > gfind):
            pos = cpos + lfind + 1
            env = Env.LOCAL
            rcomp = "" if r2chars[0:1] == "'" else "'"
        elif gfind >= 0 and not path_chars:
            bfind = chunk.rfind('{')
            if bfind >= 0 and (bfind == gfind+1):
                pos = cpos + bfind + 1
                env = Env.GLOBAL
                rcomp = "" if r2chars[0:1] == "}" else "}"
            else:
                env = Env.GLOBAL
                pos = cpos + gfind + 1    
    
    if pos == 0:
        env = Env.NONE # to-do: auto-complete commands here
    else:
        # Figure out if current statement is a matrix or scalar
        # statement. If so, will add them to completions list.
        last_line, first_word = self._last_line_first_word(code, sc_delimiter)
        if first_word:
            equals_present = (last_line.find('=') > 0)
            if re.match(r'^sca(lar|la|l)?$', first_word): #.strip()
                env = Env.SCALAR_VAR if equals_present else Env.SCALAR
            elif re.match(r'^mat(rix|ri|r)?$', first_word): #.strip()
                env = Env.MATRIX_VAR if equals_present else Env.MATRIX

        # Constructs of the form scalar(x<tab> will be filled only
        # with scalars. This can be preceded by = or `=
        if env in [Env.GENERAL, Env.STRING]:
            scalar_f, new_pos, new_rcomp = self._scalar_f_pos_rcomp(code, r2chars)
            if scalar_f:
                env = Env.SCALAR
                pos = new_pos
                rcomp = new_rcomp

    out_chunk = code[pos:]
    return env, pos, out_chunk, rcomp
