"""Lightweight syntax highlighting utilities.

Extracted from legacy `CodeBlock.render` to a standalone module per Section 5.3.
"""
from __future__ import annotations

import re
from typing import Final

from .style.colors import Color

_KW_PY: Final[set[str]] = {
    "def",
    "class",
    "return",
    "if",
    "elif",
    "else",
    "for",
    "while",
    "import",
    "from",
    "as",
    "try",
    "except",
    "with",
    "yield",
    "lambda",
    "pass",
    "raise",
    "True",
    "False",
    "None",
}

_KW_JSON_YAML: Final[set[str]] = {"true", "false", "null"}


def highlight_text(text: str, language: str | None) -> str:
    """Apply simple token-based highlighting and return a colorized string.

    The implementation intentionally avoids catastrophic backtracking by using
    simple token splits and anchored patterns.
    """
    if not language:
        return text

    # Preserve separators in the token stream
    tokens = re.split(r"(\W)", text)
    if language == "python":
        parts: list[str] = []
        for token in tokens:
            if not token:
                continue
            if token in _KW_PY:
                parts.append(Color.get_color("accent") + token + Color.RESET)
            elif token.startswith("#"):
                parts.append(Color.get_color("muted") + token + Color.RESET)
            elif re.fullmatch(r"([\'\"])(?:\\.|(?!\1).)*\1", token):
                parts.append(Color.get_color("green") + token + Color.RESET)
            elif re.fullmatch(r"\d+(?:\.\d+)?", token):
                parts.append(Color.get_color("gold") + token + Color.RESET)
            else:
                parts.append(token)
        return "".join(parts)

    if language in ("json", "yaml"):
        parts = []
        for token in tokens:
            if not token:
                continue
            tk = token.lower()
            if tk in _KW_JSON_YAML or re.fullmatch(r"\d+(?:\.\d+)?", token):
                parts.append(Color.get_color("gold") + token + Color.RESET)
            else:
                parts.append(token)
        return "".join(parts)

    return text
