# apps/aroflo_connector_app/agent/cheap/extractors.py
from __future__ import annotations

import re
from typing import Any, Dict, Optional, Tuple, List
from ..parse_kv import parse_kv_items

RE_TASK_INDEX = re.compile(r"\btask\s*#\s*(\d+)\b", re.IGNORECASE)
RE_DATE = re.compile(r"\b(\d{4}-\d{2}-\d{2})\b")
RE_DUE_RANGE = re.compile(
    r"\b(due|duedate|due_date)\b.*?\bbetween\b.*?(\d{4}-\d{2}-\d{2}).*?\band\b.*?(\d{4}-\d{2}-\d{2})",
    re.IGNORECASE,
)

RE_PAGESIZE = re.compile(r"\b(page\s*size|pagesize)\s*=\s*(\d+)\b", re.IGNORECASE)
RE_PAGE = re.compile(r"\bpage\s*=\s*(\d+)\b", re.IGNORECASE)

RE_STATUS = re.compile(r"\bstatus\s*=\s*([a-zA-Z_]+)\b", re.IGNORECASE)
RE_WHERE = re.compile(r"\bwhere\s*=\s*(.+)$", re.IGNORECASE)

RE_JOIN = re.compile(r"\bjoin\s*=\s*([a-zA-Z0-9_]+)\b", re.IGNORECASE)

# Accept simple "key=value" tokens separated by spaces (cheap structured input)
# key=value where value can be:
# - "double quoted with spaces"
# - 'single quoted with spaces'
# - or unquoted token without spaces
RE_KV = re.compile(
    r"""(?P<k>[a-zA-Z_][a-zA-Z0-9_]*)=(?P<v>"[^"]*"|'[^']*'|[^\s]+)"""
)


def extract_task_index(text: str) -> Optional[int]:
    m = RE_TASK_INDEX.search(text or "")
    if not m:
        return None
    try:
        return int(m.group(1))
    except Exception:
        return None


def extract_due_date_range(text: str) -> Optional[Tuple[str, str]]:
    m = RE_DUE_RANGE.search(text or "")
    if not m:
        return None
    return (m.group(2), m.group(3))


def extract_page_and_pagesize(text: str) -> Tuple[Optional[int], Optional[int]]:
    page = None
    pagesize = None

    m = RE_PAGE.search(text or "")
    if m:
        try:
            page = int(m.group(1))
        except Exception:
            pass

    m2 = RE_PAGESIZE.search(text or "")
    if m2:
        try:
            pagesize = int(m2.group(2))
        except Exception:
            pass

    return page, pagesize


def extract_status(text: str) -> Optional[str]:
    m = RE_STATUS.search(text or "")
    if not m:
        return None
    return m.group(1).strip().lower()


def extract_where_tail(text: str) -> Optional[str]:
    """
    Allows: where=and|createdutc|>|2019-01-01
    """
    m = RE_WHERE.search(text or "")
    if not m:
        return None
    return m.group(1).strip()


def extract_join(text: str) -> Optional[str]:
    m = RE_JOIN.search(text or "")
    if not m:
        return None
    return m.group(1).strip().lower()


def extract_kv_params(text: str) -> Dict[str, Any]:
    if not text:
        return {}

    items: List[str] = []
    for m in RE_KV.finditer(text):
        k = (m.group("k") or "").strip()
        v = (m.group("v") or "").strip()

        if len(v) >= 2 and ((v[0] == '"' and v[-1] == '"') or (v[0] == "'" and v[-1] == "'")):
            v = v[1:-1]

        if k:
            items.append(f"{k}={v}")

    out = parse_kv_items(items, lowercase_keys=True)

    for int_key in ("page", "pagesize"):
        if int_key in out:
            try:
                out[int_key] = int(out[int_key])
            except Exception:
                pass

    return out



def normalize_join_keyword(join: str) -> str:
    j = (join or "").strip().lower()
    # aliases
    if j in ("photos", "docs", "documents"):
        return "documentsandphotos"
    if j in ("po", "purchase_order", "purchaseorders"):
        return "purchaseorders"
    return j



def detect_intent(text: str) -> str:
    """
    Intent detection barato (ES/EN), orientado a prefijos + palabras clave.
    Retorna únicamente op_codes existentes en tus zonas.
    """
    t = (text or "").strip().lower()

    def _starts(prefixes: Tuple[str, ...]) -> bool:
        return any(t.startswith(p) for p in prefixes)

    # -------------------------
    # Prefix (hard disambiguation)
    # -------------------------
    starts_tasks = _starts(("tasks:", "task:", "tareas:", "tarea:"))
    starts_ts = _starts(("timesheets:", "timesheet:", "ts:", "horas:", "partehoras:", "parte horas:"))
    starts_users = _starts(("users:", "user:", "usuarios:", "usuario:"))
    starts_bu = _starts(("businessunits:", "businessunit:", "unidades:", "unidad:"))
    starts_pg = _starts(("permissiongroups:", "permissiongroup:", "permisos:", "grupospermisos:", "grupos de permisos:"))
    starts_up = _starts(("userpositions:", "userposition:", "cargos:", "posiciones:", "puestos:"))

    # Soft hints (solo si NO hay prefijo)
    has_tasks = any(w in t for w in (" task", " tasks", " tarea", " tareas"))
    has_ts = any(w in t for w in (" timesheet", " timesheets", " parte de horas", " horas "))
    has_users = any(w in t for w in (" user", " users", " usuario", " usuarios"))
    has_bu = any(w in t for w in ("businessunit", "business units", "unidad de negocio", "unidades de negocio"))
    has_pg = any(w in t for w in ("permissiongroup", "permission groups", "grupo de permisos", "grupos de permisos"))
    has_up = any(w in t for w in ("userposition", "user positions", "cargo", "cargos", "posición", "posiciones", "puesto", "puestos"))

    if starts_tasks:
        zone = "tasks"
    elif starts_ts:
        zone = "timesheets"
    elif starts_users:
        zone = "users"
    elif starts_bu:
        zone = "businessunits"
    elif starts_pg:
        zone = "permissiongroups"
    elif starts_up:
        zone = "userpositions"
    else:
        # Prioridad sin prefijo (reporting / uso típico):
        # timesheets > tasks > users > businessunits > permissiongroups > userpositions
        if has_ts:
            zone = "timesheets"
        elif has_tasks:
            zone = "tasks"
        elif has_users:
            zone = "users"
        elif has_bu:
            zone = "businessunits"
        elif has_pg:
            zone = "permissiongroups"
        elif has_up:
            zone = "userpositions"
        else:
            zone = ""

    # -------------------------
    # PERMISSIONGROUPS (read-only)
    # -------------------------
    if zone == "permissiongroups":
        return "list_permissiongroups"

    # -------------------------
    # USERPOSITIONS (read-only)
    # -------------------------
    if zone == "userpositions":
        return "list_userpositions"

    # -------------------------
    # BUSINESSUNITS (base + archived + joins)
    # op_codes: list_businessunits, list_archived_businessunits,
    #          get_businessunits_with_locations, get_businessunits_with_priorities
    # -------------------------
    if zone == "businessunits":
        # prefijos estilo CLI
        if _starts(("businessunits: archived", "businessunit: archived", "unidades: archivadas", "unidad: archivadas")):
            return "list_archived_businessunits"

        # joins
        if _starts(("businessunits: locations", "businessunit: locations", "unidades: locations", "unidad: locations")):
            return "get_businessunits_with_locations"
        if _starts(("businessunits: priorities", "businessunit: priorities", "unidades: priorities", "unidad: priorities")):
            return "get_businessunits_with_priorities"

        # join=...
        if "join=" in t:
            if "join=locations" in t:
                return "get_businessunits_with_locations"
            if "join=priorities" in t:
                return "get_businessunits_with_priorities"

        # fallbacks semánticos
        if any(w in t for w in ("archived", "archiv", "archived=true", "archivado", "archivadas", "inactivo", "inactivos")):
            return "list_archived_businessunits"
        if any(w in t for w in ("locations", "ubicaciones", "sedes", "locations join")):
            return "get_businessunits_with_locations"
        if any(w in t for w in ("priorities", "prioridades", "priorities join")):
            return "get_businessunits_with_priorities"

        return "list_businessunits"

    # -------------------------
    # USERS (base + writes + joins)
    # base: list_users, get_user
    # writes: create_user, update_user, update_user_customfields,
    #         update_user_permissiongroups, update_user_featureaccess
    # joins: get_users_with_customfields, get_users_with_documentsandphotos,
    #        get_users_with_featureaccess, get_users_with_notes,
    #        get_users_with_permissiongroups, get_users_with_trackingcentredefaults
    # -------------------------
    if zone == "users":
        JOIN_KEYWORDS = {
            "customfields": "get_users_with_customfields",
            "documentsandphotos": "get_users_with_documentsandphotos",
            "featureaccess": "get_users_with_featureaccess",
            "notes": "get_users_with_notes",
            "permissiongroups": "get_users_with_permissiongroups",
            "trackingcentredefaults": "get_users_with_trackingcentredefaults",
        }

        # joins por prefijo "users: notes"
        for join_key, op_code in JOIN_KEYWORDS.items():
            if _starts((f"users: {join_key}", f"user: {join_key}", f"usuarios: {join_key}", f"usuario: {join_key}")):
                return op_code

        # join=...
        if "join=" in t:
            for join_key, op_code in JOIN_KEYWORDS.items():
                if f"join={join_key}" in t:
                    return op_code

        # mutations por prefijo
        if _starts(("users: create", "user: create", "usuarios: crear", "usuario: crear", "users: new", "user: new")):
            return "create_user"

        if _starts(("users: update-customfields", "user: update-customfields", "usuarios: update-customfields", "usuario: update-customfields")):
            return "update_user_customfields"

        if _starts(("users: update-permissiongroups", "user: update-permissiongroups", "usuarios: update-permissiongroups", "usuario: update-permissiongroups")):
            return "update_user_permissiongroups"

        if _starts(("users: update-featureaccess", "user: update-featureaccess", "usuarios: update-featureaccess", "usuario: update-featureaccess")):
            return "update_user_featureaccess"

        if _starts(("users: update", "user: update", "usuarios: actualizar", "usuario: actualizar", "usuarios: update", "usuario: update")):
            return "update_user"

        # base por prefijo
        if _starts(("users: get", "user: get", "usuarios: ver", "usuario: ver", "usuarios: get", "usuario: get")):
            return "get_user"
        if _starts(("users: list", "user: list", "usuarios: listar", "usuario: listar", "usuarios: list", "usuario: list")):
            return "list_users"

        # fallbacks semánticos
        if any(w in t for w in ("customfields-json", "update customfields", "actualizar customfields", "custom fields")):
            return "update_user_customfields"
        if any(w in t for w in ("permissiongroups-json", "update permissiongroups", "actualizar permissiongroups", "grupos permisos")):
            return "update_user_permissiongroups"
        if any(w in t for w in ("featureaccess-json", "update featureaccess", "actualizar featureaccess", "acceso features")):
            return "update_user_featureaccess"

        if any(w in t for w in ("create user", "new user", "add user", "crear usuario", "nuevo usuario", "agregar usuario")):
            return "create_user"
        if any(w in t for w in ("update user", "edit user", "actualizar usuario", "editar usuario")):
            return "update_user"
        if ("userid" in t) or any(w in t for w in ("get user", "show user", "user details", "ver usuario", "detalle usuario")):
            return "get_user"

        # joins sin prefijo (ej: "users with notes")
        for join_key, op_code in JOIN_KEYWORDS.items():
            if f"with {join_key}" in t or f"con {join_key}" in t:
                return op_code

        return "list_users"

    # -------------------------
    # TIMESHEETS (base + ui writes)
    # base: list_timesheets, get_timesheet, get_timesheets_by_user,
    #       get_timesheets_by_task, get_timesheets_by_type, get_timesheets_after_workdate
    # writes: ui_create_timesheet_entries, ui_delete_timesheet_entries
    # -------------------------
    if zone == "timesheets":
        # UI mutations (prefijo)
        if _starts(("timesheets: ui-create", "timesheet: ui-create", "ts: ui-create", "horas: ui-create")):
            return "ui_create_timesheet_entries"
        if _starts(("timesheets: ui-delete", "timesheet: ui-delete", "ts: ui-delete", "horas: ui-delete")):
            return "ui_delete_timesheet_entries"

        # Reads (prefijo)
        if _starts(("timesheets: by-user", "timesheet: by-user", "ts: by-user", "horas: by-user")):
            return "get_timesheets_by_user"
        if _starts(("timesheets: by-task", "timesheet: by-task", "ts: by-task", "horas: by-task")):
            return "get_timesheets_by_task"
        if _starts(("timesheets: by-type", "timesheet: by-type", "ts: by-type", "horas: by-type")):
            return "get_timesheets_by_type"
        if _starts(("timesheets: after-workdate", "timesheet: after-workdate", "ts: after-workdate", "horas: after-workdate")):
            return "get_timesheets_after_workdate"
        if _starts(("timesheets: get", "timesheet: get", "ts: get", "horas: get")):
            return "get_timesheet"
        if _starts(("timesheets: list", "timesheet: list", "ts: list", "horas: list")):
            return "list_timesheets"

        # fallbacks sin prefijo
        if any(w in t for w in ("ui-create", "crear por ui", "insertar por ui", "insertar en aroflo ui")):
            return "ui_create_timesheet_entries"
        if any(w in t for w in ("ui-delete", "borrar por ui", "eliminar por ui", "delete timesheet ui")):
            return "ui_delete_timesheet_entries"

        if "after-workdate" in t or ("workdate" in t and any(w in t for w in ("after", "después", "posterior"))):
            return "get_timesheets_after_workdate"
        if "by-user" in t or ("userid" in t and any(w in t for w in ("usuario", "user"))):
            return "get_timesheets_by_user"
        if "by-task" in t or ("taskid" in t and any(w in t for w in ("tarea", "task"))):
            return "get_timesheets_by_task"
        if "by-type" in t or ("type" in t and "tipo" in t):
            return "get_timesheets_by_type"

        if ("timesheetid" in t) or any(w in t for w in ("get timesheet", "ver timesheet", "detalle timesheet")):
            return "get_timesheet"

        return "list_timesheets"

    # -------------------------
    # TASKS (base + writes + joins) — como ya lo tenías
    # -------------------------
    if zone == "tasks":
        JOIN_KEYWORDS = {
            "assets": "get_tasks_with_assets",
            "assignedhistory": "get_tasks_with_assignedhistory",
            "customfields": "get_tasks_with_customfields",
            "documentsandphotos": "get_tasks_with_documentsandphotos",
            "expense": "get_tasks_with_expense",
            "labour": "get_tasks_with_labour",
            "material": "get_tasks_with_material",
            "notes": "get_tasks_with_notes",
            "purchaseorders": "get_tasks_with_purchaseorders",
            "quote": "get_tasks_with_quote",
            "tasktotals": "get_tasks_with_tasktotals",
            "location": "get_tasks_with_location",
            "locationcustomfields": "get_tasks_with_locationcustomfields",
            "project": "get_tasks_with_project",
            "salesperson": "get_tasks_with_salesperson",
            "substatus": "get_tasks_with_substatus",
        }

        for join_key, op_code in JOIN_KEYWORDS.items():
            if _starts((f"tasks: {join_key}", f"task: {join_key}", f"tareas: {join_key}", f"tarea: {join_key}")):
                return op_code

        if "join=" in t:
            for join_key, op_code in JOIN_KEYWORDS.items():
                if f"join={join_key}" in t:
                    return op_code

        # writes
        if _starts(("tasks: create", "task: create", "tareas: crear", "tarea: crear", "tasks: new", "task: new")):
            return "create_task"
        if _starts(("tasks: update", "task: update", "tareas: actualizar", "tarea: actualizar", "tareas: update", "tarea: update")):
            return "update_task"
        if _starts(("tasks: add-note", "task: add-note", "tareas: nota", "tarea: nota", "tasks: note", "task: note")):
            return "insert_task_notes"
        if _starts(("tasks: add-material", "task: add-material", "tareas: material", "tarea: material")):
            return "insert_task_adhoc_materials"
        if _starts(("tasks: mark-processed", "task: mark-processed", "tareas: mark-processed", "tarea: mark-processed")):
            return "mark_task_linkprocessed"
        if _starts(("tasks: update-substatus", "task: update-substatus", "tareas: substatus", "tarea: substatus")):
            return "update_task_substatus"

        # base reads
        if _starts(("tasks: due-range", "task: due-range", "tareas: due-range", "tarea: due-range")):
            return "get_tasks_due_for_date_range"
        if _starts(("tasks: get", "task: get", "tareas: ver", "tarea: ver", "tareas: get", "tarea: get")):
            return "get_task"
        if _starts(("tasks: list", "task: list", "tareas: listar", "tarea: listar", "tareas: list", "tarea: list")):
            return "list_tasks"

        # fallbacks
        if any(w in t for w in ("due-range", "date range", "rango")) and ("fromdate" in t or "todate" in t):
            return "get_tasks_due_for_date_range"
        if ("taskid" in t) or any(w in t for w in ("get task", "show task", "task details", "ver tarea", "detalle tarea")):
            return "get_task"

        if any(w in t for w in ("create task", "new task", "add task", "crear tarea", "nueva tarea", "agregar tarea")):
            return "create_task"
        if any(w in t for w in ("update task", "edit task", "actualizar tarea", "editar tarea")):
            return "update_task"
        if any(w in t for w in ("add note", "insert note", "task note", "agregar nota", "insertar nota", "nota tarea")):
            return "insert_task_notes"
        if any(w in t for w in ("add material", "insert material", "adhoc material", "agregar material", "insertar material")):
            return "insert_task_adhoc_materials"
        if any(w in t for w in ("mark linkprocessed", "linkprocessed", "mark-processed", "marcar procesado")):
            return "mark_task_linkprocessed"
        if any(w in t for w in ("update substatus", "substatus", "actualizar substatus")):
            return "update_task_substatus"

        for join_key, op_code in JOIN_KEYWORDS.items():
            if f"with {join_key}" in t or f"con {join_key}" in t:
                return op_code

        return "list_tasks"

    # -------------------------
    # Fallback global conservador
    # -------------------------
    # Si no se reconoce nada, devuelve lectura segura en una zona común.
    return "list_tasks"