"""
Word Cloud — fuzzy intent resolution via weighted word-to-intent mappings.

Instead of rigid regex patterns that require specific word order, this module
builds a weighted word cloud where every word in the input contributes a score
towards one or more intents. The highest-scoring intent wins.

This makes command parsing **order-independent** — "called train.py a python
file create" works just as well as "create a python file called train.py".

Architecture:
    1. WordCloud holds intent→word mappings with weights
    2. EntityExtractor pulls filenames, paths, and values from any position
    3. IntentResolver scores all intents and picks the best match
"""

from __future__ import annotations

import re
from collections import defaultdict
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Set, Tuple


@dataclass
class WordMapping:
    """A single word→intent association with a weight."""
    word: str
    intent: str
    weight: float = 1.0
    category: str = "action"  # action, object, modifier


@dataclass
class IntentScore:
    """Scored intent from word cloud resolution."""
    intent: str
    score: float
    matched_words: List[str] = field(default_factory=list)
    confidence: float = 0.0
    entities: Dict[str, str] = field(default_factory=dict)


# ═══════════════════════════════════════════════════════════════════════════════
# Word Cloud — the core mapping table
# ═══════════════════════════════════════════════════════════════════════════════

# Action verbs → intent mapping
_ACTION_WORDS: Dict[str, List[Tuple[str, float]]] = {
    # File creation
    "create":     [("mkdir", 0.5), ("create_file", 0.5), ("create_py", 0.3), ("create_sh", 0.3), ("create_md", 0.3), ("scaffold", 0.3)],
    "make":       [("mkdir", 0.5), ("create_file", 0.5), ("create_py", 0.3), ("create_sh", 0.3), ("create_md", 0.3), ("scaffold", 0.3)],
    "write":      [("create_file", 0.6), ("create_py", 0.4), ("create_sh", 0.4), ("create_md", 0.4)],
    "generate":   [("create_file", 0.5), ("create_py", 0.4), ("create_sh", 0.4), ("create_md", 0.4), ("scaffold", 0.3)],
    "build":      [("scaffold", 0.6), ("create_file", 0.3), ("mkdir", 0.3)],
    "add":        [("create_file", 0.5), ("create_py", 0.3), ("create_md", 0.3), ("append", 0.4)],
    "touch":      [("create_file", 0.8)],
    "new":        [("create_file", 0.3), ("mkdir", 0.3), ("scaffold", 0.3)],
    # Directory
    "mkdir":      [("mkdir", 1.5)],
    "directory":  [("mkdir", 1.2)],
    "dir":        [("mkdir", 1.2)],
    "folder":     [("mkdir", 1.2)],
    "folders":    [("mkdir", 1.2)],
    # Scaffold
    "scaffold":   [("scaffold", 1.5)],
    "setup":      [("scaffold", 0.8)],
    "init":       [("scaffold", 0.9)],
    "initialize": [("scaffold", 0.9)],
    "bootstrap":  [("scaffold", 0.9)],
    "project":    [("scaffold", 0.8)],
    "package":    [("scaffold", 0.6)],
    "app":        [("scaffold", 0.5)],
    # File types
    "python":     [("create_py", 1.3)],
    "py":         [("create_py", 1.3)],
    "script":     [("create_sh", 0.6), ("create_py", 0.4)],
    "shell":      [("create_sh", 1.3)],
    "bash":       [("create_sh", 1.3)],
    "sh":         [("create_sh", 1.3)],
    "markdown":   [("create_md", 1.3)],
    "md":         [("create_md", 1.3)],
    "doc":        [("create_md", 0.9)],
    "documentation": [("create_md", 0.9)],
    "readme":     [("create_md", 1.0)],
    "module":     [("create_py", 0.8)],
    "modules":    [("create_py", 0.8)],
    # File ops
    "file":       [("create_file", 0.4), ("create_py", 0.2), ("create_sh", 0.2), ("create_md", 0.2)],
    "files":      [("create_file", 0.4)],
    # Read / list
    "list":       [("list_dir", 1.2)],
    "ls":         [("list_dir", 1.5)],
    "show":       [("list_dir", 0.6), ("read_file", 0.6)],
    "display":    [("list_dir", 0.5), ("read_file", 0.5)],
    "read":       [("read_file", 1.2)],
    "cat":        [("read_file", 1.5)],
    "print":      [("read_file", 0.8)],
    "open":       [("read_file", 0.7)],
    "view":       [("read_file", 0.8), ("list_dir", 0.4)],
    # Delete
    "delete":     [("delete_file", 1.3)],
    "remove":     [("delete_file", 1.3)],
    "rm":         [("delete_file", 1.5)],
    # Run
    "run":        [("run_shell", 1.0), ("run_python", 0.5)],
    "execute":    [("run_shell", 1.0), ("run_python", 0.5)],
    "command":    [("run_shell", 0.8)],
    # Append
    "append":     [("append", 1.3)],
    # Tree
    "tree":       [("tree", 1.5), ("list_dir", 0.4)],
    # Exists
    "exists":     [("file_exists", 1.3)],
    "exist":      [("file_exists", 1.0)],
    "check":      [("file_exists", 0.6)],
}

# Object type words that refine intent (boost scores)
_OBJECT_BOOSTS: Dict[str, Dict[str, float]] = {
    "directory":  {"mkdir": 1.0, "list_dir": 0.5},
    "dir":        {"mkdir": 1.0, "list_dir": 0.5},
    "folder":     {"mkdir": 1.0, "list_dir": 0.5},
    "file":       {"create_file": 0.3},
    "python":     {"create_py": 1.0},
    "py":         {"create_py": 1.0},
    "shell":      {"create_sh": 1.0},
    "bash":       {"create_sh": 1.0},
    "sh":         {"create_sh": 1.0},
    "script":     {"create_sh": 0.5, "create_py": 0.3},
    "markdown":   {"create_md": 1.0},
    "md":         {"create_md": 1.0},
    "doc":        {"create_md": 0.7},
    "module":     {"create_py": 0.8},
    "project":    {"scaffold": 1.0},
    "package":    {"scaffold": 0.7},
}

# Filler words to ignore
_STOP_WORDS: Set[str] = {
    "a", "an", "the", "is", "it", "to", "for", "of", "in", "on", "at",
    "with", "from", "by", "as", "into", "that", "this", "be", "are",
    "was", "were", "been", "being", "have", "has", "had", "do", "does",
    "did", "will", "would", "could", "should", "may", "might", "can",
    "shall", "i", "me", "my", "we", "our", "you", "your", "please",
    "just", "some", "any", "every", "each", "really", "very", "also",
    "too", "so", "now", "here", "there", "up", "out", "about", "then",
    "than", "but", "or", "if", "when", "while", "after", "before",
    "called", "named", "titled", "labeled", "labelled", "new",
    "inside", "within", "under", "over",
}


# ═══════════════════════════════════════════════════════════════════════════════
# Entity Extractor — pull names/paths from any position
# ═══════════════════════════════════════════════════════════════════════════════

# Patterns for extracting entities regardless of position
_ENTITY_PATTERNS = [
    # Explicit filenames (with extension)
    (r"['\"]?(?P<filename>[\w./-]+\.(?:py|sh|md|txt|yaml|yml|json|toml|cfg|ini|log|csv|sql|html|css|js|ts|tsx|jsx))['\"]?", "filename"),
    # Quoted strings
    (r"['\"](?P<quoted>[^'\"]+)['\"]", "quoted"),
    # Path-like tokens (contain / or .)
    (r"(?<!\w)(?P<pathlike>(?:[\w-]+/)+[\w.-]+)(?!\w)", "path"),
]


class EntityExtractor:
    """Extract filenames, paths, and names from text regardless of position."""

    @staticmethod
    def extract(text: str) -> Dict[str, List[str]]:
        """
        Pull all entity-like tokens from *text*.

        Returns dict with keys: filenames, paths, names, quoted
        """
        entities: Dict[str, List[str]] = {
            "filenames": [], "paths": [], "names": [], "quoted": [],
        }

        # 1. Quoted strings
        for m in re.finditer(r"['\"]([^'\"]+)['\"]", text):
            entities["quoted"].append(m.group(1))

        # 2. Filenames with extensions
        for m in re.finditer(r"(?:^|\s)(['\"]?)([\w./-]+\.(?:py|sh|md|txt|yaml|yml|json|toml|cfg|ini|log|csv|sql|html|css|js|ts|tsx|jsx))\1(?:\s|$)", text):
            entities["filenames"].append(m.group(2))

        # 3. Path-like tokens
        for m in re.finditer(r"(?:^|\s)([\w-]+(?:/[\w.-]+)+)(?:\s|$)", text):
            entities["paths"].append(m.group(1))

        # 4. Standalone names — words after "called"/"named" or capitalized tokens
        for m in re.finditer(r"(?:called|named|titled)\s+['\"]?(\S+)['\"]?", text, re.I):
            val = m.group(1).strip("'\"")
            entities["names"].append(val)

        # 5. CamelCase or UPPER tokens as potential names
        for m in re.finditer(r"\b([A-Z][a-z]+(?:[A-Z][a-z]+)+|[A-Z]{2,})\b", text):
            entities["names"].append(m.group(1))

        # 6. Fallback: unknown words not in action/object/stop lists
        #    These are likely entity names (e.g. "models" in "models folder create")
        all_known = set(_ACTION_WORDS.keys()) | set(_OBJECT_BOOSTS.keys()) | _STOP_WORDS
        for word in re.findall(r"[\w][\w.-]*", text):
            low = word.lower()
            if low not in all_known and word not in [n for lst in entities.values() for n in lst]:
                entities["names"].append(word)

        return entities

    @staticmethod
    def best_name(entities: Dict[str, List[str]], intent: str) -> str:
        """Pick the best entity for a given intent."""
        # Filenames first for file-creation intents
        if intent.startswith("create_") or intent in ("read_file", "delete_file", "append"):
            if entities["filenames"]:
                return entities["filenames"][0]
            if entities["quoted"]:
                return entities["quoted"][0]
            if entities["names"]:
                return entities["names"][0]

        # Paths for directory intents
        if intent in ("mkdir", "list_dir", "tree"):
            if entities["paths"]:
                return entities["paths"][0]
            if entities["names"]:
                return entities["names"][0]
            if entities["quoted"]:
                return entities["quoted"][0]

        # Scaffold
        if intent == "scaffold":
            if entities["names"]:
                return entities["names"][0]
            if entities["quoted"]:
                return entities["quoted"][0]

        # Shell command — everything after run/execute keyword
        if intent == "run_shell":
            if entities["quoted"]:
                return entities["quoted"][0]

        # Fallback: first non-stopword token that looks like a name
        if entities["names"]:
            return entities["names"][0]
        if entities["filenames"]:
            return entities["filenames"][0]
        if entities["quoted"]:
            return entities["quoted"][0]

        return ""


# ═══════════════════════════════════════════════════════════════════════════════
# Intent Resolver — score intents from word cloud
# ═══════════════════════════════════════════════════════════════════════════════

class IntentResolver:
    """
    Order-independent intent resolution via weighted word cloud.

    Every word in the input contributes scores to candidate intents.
    The highest-scoring intent wins. Entity extraction happens
    separately and is position-independent.
    """

    def __init__(self):
        self.action_words = _ACTION_WORDS
        self.object_boosts = _OBJECT_BOOSTS
        self.stop_words = _STOP_WORDS
        self.extractor = EntityExtractor()

    def resolve(self, text: str) -> Optional[IntentScore]:
        """
        Resolve the best intent from *text*.

        Returns None if no actionable intent is found (confidence too low).
        """
        words = self._tokenize(text)
        scores: Dict[str, float] = defaultdict(float)
        matched: Dict[str, List[str]] = defaultdict(list)

        for word in words:
            low = word.lower()

            # Action word scoring
            if low in self.action_words:
                for intent, weight in self.action_words[low]:
                    scores[intent] += weight
                    matched[intent].append(low)

            # Object boost scoring
            if low in self.object_boosts:
                for intent, boost in self.object_boosts[low].items():
                    scores[intent] += boost
                    if low not in matched[intent]:
                        matched[intent].append(low)

        if not scores:
            return None

        # Find best intent
        best_intent = max(scores, key=lambda k: scores[k])
        best_score = scores[best_intent]

        # Minimum threshold — must have at least an action + object
        if best_score < 1.0:
            return None

        # Extract entities
        entities = self.extractor.extract(text)
        name = self.extractor.best_name(entities, best_intent)

        # Confidence = score normalised against max possible
        confidence = min(best_score / 3.0, 1.0)

        return IntentScore(
            intent=best_intent,
            score=best_score,
            matched_words=matched[best_intent],
            confidence=confidence,
            entities={"name": name, **{k: v[0] if v else "" for k, v in entities.items()}},
        )

    def resolve_multi(self, text: str) -> List[IntentScore]:
        """
        Resolve multiple intents from a multi-clause request.

        Splits on conjunctions/semicolons, resolves each clause independently.
        """
        clauses = re.split(
            r"\s*(?:,\s*(?:and\s+)?then\s+|;\s*|\.\s+(?=[A-Z])|,\s*and\s+then\s+|,\s*then\s+|\band\s+then\b|\bthen\b|\balso\b|\bplus\b)\s*",
            text,
        )

        results = []
        for clause in clauses:
            clause = clause.strip()
            if len(clause) < 3:
                continue
            result = self.resolve(clause)
            if result:
                results.append(result)

        return results

    def _tokenize(self, text: str) -> List[str]:
        """Split text into tokens, filtering stopwords."""
        raw = re.findall(r"[\w./-]+", text)
        return [w for w in raw if w.lower() not in self.stop_words]

    def get_word_cloud_summary(self, text: str) -> Dict[str, Any]:
        """
        Generate a word cloud summary for a given input — shows how
        each word contributes to intent scoring.
        """
        words = self._tokenize(text)
        cloud: List[Dict[str, Any]] = []

        for word in words:
            low = word.lower()
            entry: Dict[str, Any] = {"word": word, "intents": {}}

            if low in self.action_words:
                for intent, weight in self.action_words[low]:
                    entry["intents"][intent] = entry["intents"].get(intent, 0) + weight
            if low in self.object_boosts:
                for intent, boost in self.object_boosts[low].items():
                    entry["intents"][intent] = entry["intents"].get(intent, 0) + boost

            if entry["intents"]:
                cloud.append(entry)

        return {
            "input": text,
            "tokens": words,
            "cloud": cloud,
            "resolution": self.resolve(text),
        }
