# ═══════════════════════════════════════════════════════════════════════════════════
# SYNTHOS PRIMITIVE REGISTRY v1.0
# Complete registry of regex primitives with geometric forms and metadata
# ═══════════════════════════════════════════════════════════════════════════════════

import re
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass
from enum import Enum

class PrimitiveType(Enum):
    ATOMIC = "atomic"
    QUANTIFIER = "quantifier"
    GROUP = "group"
    LOOKAROUND = "lookaround"
    REFERENCE = "reference"
    CHARACTER_CLASS = "character_class"

@dataclass
class Primitive:
    """Represents a SYNTHOS regex primitive with geometric metadata"""
    id: str
    name: str
    regex: str
    geometric_form: str
    description: str
    primitive_type: PrimitiveType
    examples: List[str]
    composition_rules: List[str]

class PrimitiveRegistry:
    """Complete registry of SYNTHOS primitives"""
    
    def __init__(self):
        self.primitives: Dict[str, Primitive] = {}
        self._load_primitives()
    
    def _load_primitives(self):
        """Load all primitive definitions"""
        
        # CLASS: ATOMIC PRIMITIVES
        atomic_primitives = [
            Primitive(
                id="P001",
                name="GLYPH",
                regex=".",
                geometric_form="·",
                description="Any single character",
                primitive_type=PrimitiveType.ATOMIC,
                examples=["a", "1", " ", "$"],
                composition_rules=["· + · = ──", "· + ○ = ●"]
            ),
            Primitive(
                id="P002", 
                name="WORD_CHAR",
                regex="\\w",
                geometric_form="·",
                description="[a-zA-Z0-9_]",
                primitive_type=PrimitiveType.ATOMIC,
                examples=["a", "Z", "5", "_"],
                composition_rules=["\\w+ = SPAN", "\\w\\d = ALPHANUM"]
            ),
            Primitive(
                id="P003",
                name="DIGIT", 
                regex="\\d",
                geometric_form="·",
                description="[0-9]",
                primitive_type=PrimitiveType.ATOMIC,
                examples=["0", "5", "9"],
                composition_rules=["\\d+ = NUMBER", "\\d\\d = DOUBLE_DIGIT"]
            ),
            Primitive(
                id="P004",
                name="WHITESPACE",
                regex="\\s", 
                geometric_form="·",
                description="[ \\t\\n\\r\\f\\v]",
                primitive_type=PrimitiveType.ATOMIC,
                examples=[" ", "\t", "\n"],
                composition_rules=["\\s+ = SPACE_BLOCK", "\\s* = OPTIONAL_SPACE"]
            ),
            Primitive(
                id="P005",
                name="NONWORD",
                regex="\\W",
                geometric_form="·",
                description="[^a-zA-Z0-9_]",
                primitive_type=PrimitiveType.ATOMIC,
                examples=["@", "#", " ", "-"],
                composition_rules=["\\W+ = DELIMITER"]
            ),
            Primitive(
                id="P006",
                name="NONDIGIT",
                regex="\\D",
                geometric_form="·", 
                description="[^0-9]",
                primitive_type=PrimitiveType.ATOMIC,
                examples=["a", "#", " "],
                composition_rules=["\\D+ = NON_NUMERIC"]
            ),
            Primitive(
                id="P007",
                name="NONSPACE",
                regex="\\S",
                geometric_form="·",
                description="[^ \\t\\n\\r\\f\\v]",
                primitive_type=PrimitiveType.ATOMIC,
                examples=["a", "5", "@"],
                composition_rules=["\\S+ = TOKEN"]
            ),
            Primitive(
                id="P008",
                name="BOUNDARY",
                regex="\\b",
                geometric_form="║",
                description="Word boundary (zero-width)",
                primitive_type=PrimitiveType.ATOMIC,
                examples=["", ""],
                composition_rules=["\\b\\w+\\b = WORD_TOKEN"]
            ),
            Primitive(
                id="P009",
                name="NONBOUND",
                regex="\\B",
                geometric_form="║",
                description="Non-word boundary",
                primitive_type=PrimitiveType.ATOMIC,
                examples=["", ""],
                composition_rules=["\\w\\B\\w = INSIDE_WORD"]
            ),
            Primitive(
                id="P010",
                name="LINE_START",
                regex="^",
                geometric_form="║",
                description="Start of line anchor",
                primitive_type=PrimitiveType.ATOMIC,
                examples=["", ""],
                composition_rules=["^\\s* = INDENT_START"]
            ),
            Primitive(
                id="P011",
                name="LINE_END",
                regex="$",
                geometric_form="║",
                description="End of line anchor",
                primitive_type=PrimitiveType.ATOMIC,
                examples=["", ""],
                composition_rules=["\\s*$ = TRAILING_SPACE"]
            ),
            Primitive(
                id="P012",
                name="STR_START",
                regex="\\A",
                geometric_form="║",
                description="Absolute string start",
                primitive_type=PrimitiveType.ATOMIC,
                examples=["", ""],
                composition_rules=["\\A\\w+ = STRING_START_TOKEN"]
            ),
            Primitive(
                id="P013",
                name="STR_END",
                regex="\\Z",
                geometric_form="║",
                description="Absolute string end",
                primitive_type=PrimitiveType.ATOMIC,
                examples=["", ""],
                composition_rules=["\\w+\\Z = STRING_END_TOKEN"]
            )
        ]
        
        # CLASS: QUANTIFIER CONSTRUCTS
        quantifier_primitives = [
            Primitive(
                id="Q001",
                name="OPT",
                regex="x?",
                geometric_form="◇",
                description="Zero or one (greedy)",
                primitive_type=PrimitiveType.QUANTIFIER,
                examples=["", "x"],
                composition_rules=["\\w? = OPTIONAL_WORD"]
            ),
            Primitive(
                id="Q002",
                name="OPT_LAZY",
                regex="x??",
                geometric_form="◇",
                description="Zero or one (lazy)",
                primitive_type=PrimitiveType.QUANTIFIER,
                examples=["", "x"],
                composition_rules=["\\w?? = LAZY_OPTIONAL"]
            ),
            Primitive(
                id="Q003",
                name="STAR",
                regex="x*",
                geometric_form="∞",
                description="Zero or more (greedy)",
                primitive_type=PrimitiveType.QUANTIFIER,
                examples=["", "x", "xx", "xxx"],
                composition_rules=["\\w* = ZERO_OR_MORE_WORDS"]
            ),
            Primitive(
                id="Q004",
                name="STAR_LAZY",
                regex="x*?",
                geometric_form="∞",
                description="Zero or more (lazy)",
                primitive_type=PrimitiveType.QUANTIFIER,
                examples=["", "x", "xx"],
                composition_rules=["\\w*? = LAZY_STAR"]
            ),
            Primitive(
                id="Q005",
                name="STAR_POSS",
                regex="x*+",
                geometric_form="∞",
                description="Zero or more (possessive)",
                primitive_type=PrimitiveType.QUANTIFIER,
                examples=["", "x", "xx"],
                composition_rules=["\\w*+ = POSSESSIVE_STAR"]
            ),
            Primitive(
                id="Q006",
                name="PLUS",
                regex="x+",
                geometric_form="⊕",
                description="One or more (greedy)",
                primitive_type=PrimitiveType.QUANTIFIER,
                examples=["x", "xx", "xxx"],
                composition_rules=["\\w+ = ONE_OR_MORE_WORDS"]
            ),
            Primitive(
                id="Q007",
                name="PLUS_LAZY",
                regex="x+?",
                geometric_form="⊕",
                description="One or more (lazy)",
                primitive_type=PrimitiveType.QUANTIFIER,
                examples=["x", "xx"],
                composition_rules=["\\w+? = LAZY_PLUS"]
            ),
            Primitive(
                id="Q008",
                name="PLUS_POSS",
                regex="x++",
                geometric_form="▶▶",
                description="One or more (possessive)",
                primitive_type=PrimitiveType.QUANTIFIER,
                examples=["x", "xx"],
                composition_rules=["\\w++ = POSSESSIVE_PLUS"]
            ),
            Primitive(
                id="Q009",
                name="EXACT",
                regex="x{n}",
                geometric_form="[n]",
                description="Exactly n",
                primitive_type=PrimitiveType.QUANTIFIER,
                examples=["xxx" for n in [3]],
                composition_rules=["\\w{3} = EXACT_THREE"]
            ),
            Primitive(
                id="Q010",
                name="RANGE",
                regex="x{n,m}",
                geometric_form="[n⟷m]",
                description="Between n and m",
                primitive_type=PrimitiveType.QUANTIFIER,
                examples=["x", "xx", "xxx"],
                composition_rules=["\\w{1,3} = ONE_TO_THREE"]
            ),
            Primitive(
                id="Q011",
                name="ATLEAST",
                regex="x{n,}",
                geometric_form="[n→∞]",
                description="At least n",
                primitive_type=PrimitiveType.QUANTIFIER,
                examples=["xxx", "xxxx"],
                composition_rules=["\\w{3,} = THREE_OR_MORE"]
            )
        ]
        
        # CLASS: GROUP CONSTRUCTS
        group_primitives = [
            Primitive(
                id="G001",
                name="CAPTURE",
                regex="(...)",
                geometric_form="⌈...⌋",
                description="Capturing group",
                primitive_type=PrimitiveType.GROUP,
                examples=["abc", "123"],
                composition_rules=["(\\w+) = CAPTURE_WORDS"]
            ),
            Primitive(
                id="G002",
                name="NON_CAP",
                regex="(?:...)",
                geometric_form="⟨...⟩",
                description="Non-capturing group",
                primitive_type=PrimitiveType.GROUP,
                examples=["abc", "123"],
                composition_rules=["(?:\\w+) = GROUP_WITHOUT_CAPTURE"]
            ),
            Primitive(
                id="G003",
                name="NAMED",
                regex="(?P<n>...)",
                geometric_form="⟦n:...⟧",
                description="Named capturing group",
                primitive_type=PrimitiveType.GROUP,
                examples=["abc", "123"],
                composition_rules=["(?P<word>\\w+) = NAMED_WORD"]
            ),
            Primitive(
                id="G004",
                name="ATOMIC",
                regex="(?>...)",
                geometric_form="⬡",
                description="Atomic group (no backtrack)",
                primitive_type=PrimitiveType.GROUP,
                examples=["abc", "123"],
                composition_rules=["(?>\\w+) = ATOMIC_WORDS"]
            ),
            Primitive(
                id="G005",
                name="BRANCH_RST",
                regex="(?|...)",
                geometric_form="⑂",
                description="Branch reset group",
                primitive_type=PrimitiveType.GROUP,
                examples=["a|b", "c|d"],
                composition_rules=["(?|\\w+|\\d+) = RESET_BRANCH"]
            ),
            Primitive(
                id="G006",
                name="FLAGS",
                regex="(?flags:...)",
                geometric_form="🏁",
                description="Inline flags (ximsu)",
                primitive_type=PrimitiveType.GROUP,
                examples=["abc", "123"],
                composition_rules=["(?x:\\w+) = VERBOSE_MODE"]
            ),
            Primitive(
                id="G007",
                name="COMMENT",
                regex="(?#...)",
                geometric_form="💬",
                description="Inline comment",
                primitive_type=PrimitiveType.GROUP,
                examples=[""],
                composition_rules=["\\w+(?#comment) = COMMENTED_PATTERN"]
            )
        ]
        
        # CLASS: LOOKAROUND CONSTRUCTS
        lookaround_primitives = [
            Primitive(
                id="L001",
                name="LAHEAD_POS",
                regex="(?=...)",
                geometric_form="→[?]",
                description="Positive lookahead",
                primitive_type=PrimitiveType.LOOKAROUND,
                examples=[""],
                composition_rules=["\\w+(?=\\s) = WORD_BEFORE_SPACE"]
            ),
            Primitive(
                id="L002",
                name="LAHEAD_NEG",
                regex="(?!...)",
                geometric_form="→[✗]",
                description="Negative lookahead",
                primitive_type=PrimitiveType.LOOKAROUND,
                examples=[""],
                composition_rules=["\\w+(?!\\w) = WORD_NOT_FOLLOWED"]
            ),
            Primitive(
                id="L003",
                name="LBEHND_POS",
                regex="(?<=...)",
                geometric_form="[?]←",
                description="Positive lookbehind",
                primitive_type=PrimitiveType.LOOKAROUND,
                examples=[""],
                composition_rules=["(?<=\\s)\\w+ = WORD_AFTER_SPACE"]
            ),
            Primitive(
                id="L004",
                name="LBEHND_NEG",
                regex="(?<!...)",
                geometric_form="[✗]←",
                description="Negative lookbehind",
                primitive_type=PrimitiveType.LOOKAROUND,
                examples=[""],
                composition_rules=["(?<!\\w)\\w+ = WORD_NOT_PRECEDED"]
            ),
            Primitive(
                id="L005",
                name="COND_LAHEAD",
                regex="?(?=...)T|F",
                geometric_form="→[?]?T:F",
                description="Conditional on lookahead",
                primitive_type=PrimitiveType.LOOKAROUND,
                examples=[""],
                composition_rules=["(?(?=\\w)\\w|\\d) = WORD_OR_DIGIT"]
            ),
            Primitive(
                id="L006",
                name="COND_CAPGRP",
                regex="?( n )T|F",
                geometric_form="[n]?T:F",
                description="Conditional on capture group n",
                primitive_type=PrimitiveType.LOOKAROUND,
                examples=[""],
                composition_rules=["(?(1)\\w|\\d) = IF_GROUP1_THEN_WORD"]
            )
        ]
        
        # CLASS: REFERENCE CONSTRUCTS
        reference_primitives = [
            Primitive(
                id="R001",
                name="BACKREF_N",
                regex="\\1 ... \\99",
                geometric_form="↩",
                description="Numbered backreference",
                primitive_type=PrimitiveType.REFERENCE,
                examples=[""],
                composition_rules=["(\\w+)\\s+\\1 = REPEATED_WORD"]
            ),
            Primitive(
                id="R002",
                name="BACKREF_NM",
                regex="\\k<name>",
                geometric_form="↩",
                description="Named backreference",
                primitive_type=PrimitiveType.REFERENCE,
                examples=[""],
                composition_rules=["(?P<word>\\w+)\\s+\\k<word> = REPEATED_NAMED"]
            ),
            Primitive(
                id="R003",
                name="RECURSE_0",
                regex="(?R)",
                geometric_form="🔄",
                description="Recurse entire pattern",
                primitive_type=PrimitiveType.REFERENCE,
                examples=[""],
                composition_rules=["\\(([^()]|(?R))*\\) = BALANCED_PARENTHESES"]
            ),
            Primitive(
                id="R004",
                name="RECURSE_N",
                regex="(?1)",
                geometric_form="🔄",
                description="Recurse group N",
                primitive_type=PrimitiveType.REFERENCE,
                examples=[""],
                composition_rules=["(?P<paren>\\(([^()]|(?&paren))*\\)) = RECURSIVE_PARENTHESES"]
            ),
            Primitive(
                id="R005",
                name="RECURSE_NM",
                regex="(?&name)",
                geometric_form="🔄",
                description="Recurse named group",
                primitive_type=PrimitiveType.REFERENCE,
                examples=[""],
                composition_rules=["(?P<balanced>\\(([^()]|(?&balanced))*\\)) = NAMED_RECURSION"]
            ),
            Primitive(
                id="R006",
                name="DEFINE",
                regex="?(DEFINE)",
                geometric_form="📝",
                description="Define subroutine (no-match)",
                primitive_type=PrimitiveType.REFERENCE,
                examples=[""],
                composition_rules=["(?(DEFINE)(?P<sub>\\w+)) = DEFINE_SUBROUTINE"]
            )
        ]
        
        # CLASS: CHARACTER CLASS CONSTRUCTS
        character_class_primitives = [
            Primitive(
                id="C001",
                name="POSIX_ALPHA",
                regex="[[:alpha:]]",
                geometric_form="○",
                description="POSIX alpha class",
                primitive_type=PrimitiveType.CHARACTER_CLASS,
                examples=["a", "Z"],
                composition_rules=["[[:alpha:]]+ = ALPHA_SEQUENCE"]
            ),
            Primitive(
                id="C002",
                name="POSIX_DIGIT",
                regex="[[:digit:]]",
                geometric_form="○",
                description="POSIX digit class",
                primitive_type=PrimitiveType.CHARACTER_CLASS,
                examples=["0", "5"],
                composition_rules=["[[:digit:]]+ = DIGIT_SEQUENCE"]
            ),
            Primitive(
                id="C003",
                name="UNICODE_CAT",
                regex="\\p{Letter}",
                geometric_form="○",
                description="Unicode category",
                primitive_type=PrimitiveType.CHARACTER_CLASS,
                examples=["a", "Z", "é"],
                composition_rules=["\\p{Letter}+ = LETTER_SEQUENCE"]
            ),
            Primitive(
                id="C004",
                name="NEG_UNICODE",
                regex="\\P{Letter}",
                geometric_form="⊘",
                description="Negated Unicode category",
                primitive_type=PrimitiveType.CHARACTER_CLASS,
                examples=["1", "@", " "],
                composition_rules=["\\P{Letter}+ = NON_LETTER_SEQUENCE"]
            ),
            Primitive(
                id="C005",
                name="INTERSECTION",
                regex="[a-z&&[^aeiou]]",
                geometric_form="∩",
                description="Character class intersection",
                primitive_type=PrimitiveType.CHARACTER_CLASS,
                examples=["b", "c", "d"],
                composition_rules=["[a-z&&[^aeiou]] = CONSONANTS"]
            )
        ]
        
        # Load all primitives into registry
        for primitive_list in [
            atomic_primitives, quantifier_primitives, group_primitives,
            lookaround_primitives, reference_primitives, character_class_primitives
        ]:
            for primitive in primitive_list:
                self.primitives[primitive.id] = primitive
    
    def get_primitive(self, primitive_id: str) -> Optional[Primitive]:
        """Get primitive by ID"""
        return self.primitives.get(primitive_id)
    
    def get_primitives_by_type(self, primitive_type: PrimitiveType) -> List[Primitive]:
        """Get all primitives of a specific type"""
        return [p for p in self.primitives.values() if p.primitive_type == primitive_type]
    
    def search_primitives(self, query: str) -> List[Primitive]:
        """Search primitives by name, description, or regex"""
        query_lower = query.lower()
        results = []
        for primitive in self.primitives.values():
            if (query_lower in primitive.name.lower() or 
                query_lower in primitive.description.lower() or
                query_lower in primitive.regex.lower()):
                results.append(primitive)
        return results
    
    def get_geometric_forms(self) -> Dict[str, List[str]]:
        """Get mapping of geometric forms to primitive IDs"""
        forms = {}
        for primitive in self.primitives.values():
            form = primitive.geometric_form
            if form not in forms:
                forms[form] = []
            forms[form].append(primitive.id)
        return forms
    
    def validate_regex_syntax(self, pattern: str) -> Tuple[bool, Optional[str]]:
        """Validate if a regex pattern is syntactically correct"""
        try:
            re.compile(pattern)
            return True, None
        except re.error as e:
            return False, str(e)
    
    def compose_primitives(self, primitive_ids: List[str]) -> Optional[str]:
        """Compose multiple primitives into a regex pattern"""
        patterns = []
        for pid in primitive_ids:
            primitive = self.get_primitive(pid)
            if not primitive:
                return None
            patterns.append(primitive.regex)
        
        combined_pattern = "".join(patterns)
        is_valid, error = self.validate_regex_syntax(combined_pattern)
        if is_valid:
            return combined_pattern
        else:
            return None

# Example usage and demonstration
if __name__ == "__main__":
    registry = PrimitiveRegistry()
    
    # Demonstrate primitive lookup
    print("=== SYNTHOS PRIMITIVE REGISTRY DEMO ===")
    
    # Get specific primitive
    atom = registry.get_primitive("P001")
    print(f"\nPrimitive P001: {atom.name} -> {atom.regex} [{atom.geometric_form}]")
    
    # Get all atomic primitives
    atomic_prims = registry.get_primitives_by_type(PrimitiveType.ATOMIC)
    print(f"\nFound {len(atomic_prims)} atomic primitives")
    
    # Search primitives
    search_results = registry.search_primitives("word")
    print(f"\nSearch results for 'word': {[p.name for p in search_results]}")
    
    # Get geometric forms mapping
    forms = registry.get_geometric_forms()
    print(f"\nGeometric forms: {forms}")
    
    # Compose primitives
    composition = registry.compose_primitives(["P002", "Q006"])
    print(f"\nComposed pattern: {composition}")
    
    print("\n=== PRIMITIVE REGISTRY LOADED ===")
