# ═══════════════════════════════════════════════════════════════════════════════════
# SYNTHOS RECURSIVE GRAMMAR ENGINE (RGE) v1.0
# EBNF productions encoded in recursive regex with state machine parsing
# ═══════════════════════════════════════════════════════════════════════════════════

import re
from typing import Dict, List, Tuple, Optional, Set, Any, Callable
from dataclasses import dataclass, field
from enum import Enum
from collections import defaultdict
try:
    import graphviz
except ImportError:
    graphviz = None

class ProductionType(Enum):
    TERMINAL = "terminal"
    NONTERMINAL = "nonterminal"
    RECURSIVE = "recursive"
    OPTION = "option"
    REPETITION = "repetition"
    ALTERNATION = "alternation"

@dataclass
class ProductionRule:
    """EBNF production rule encoded in regex"""
    rule_name: str
    ebnf_pattern: str
    regex_pattern: str
    production_type: ProductionType
    geometric_form: str
    dependencies: Set[str] = field(default_factory=set)
    
    def __post_init__(self):
        # Extract dependencies from regex pattern
        self._extract_dependencies()
    
    def _extract_dependencies(self):
        """Extract rule dependencies from regex pattern"""
        # Find recursive references like (?&RULE_NAME) or (?1)
        recursive_refs = re.findall(r'\(\?&(\w+)\)|\(\?(\d+)\)', self.regex_pattern)
        for ref in recursive_refs:
            dep = ref[0] if ref[0] else ref[1]
            if dep != self.rule_name:
                self.dependencies.add(dep)

@dataclass
class ParseState:
    """State in grammar finite state machine"""
    state_id: str
    rule_name: str
    position: int
    stack_depth: int
    matched_text: str = ""
    is_accepting: bool = False
    
    def __str__(self):
        return f"State({self.state_id}:{self.rule_name}@{self.position})"

@dataclass
class ParseTree:
    """Parse tree node for grammar parsing"""
    rule_name: str
    matched_text: str
    children: List['ParseTree'] = field(default_factory=list)
    start_pos: int = 0
    end_pos: int = 0
    
    def add_child(self, child: 'ParseTree'):
        """Add a child node"""
        self.children.append(child)
    
    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary representation"""
        return {
            "rule": self.rule_name,
            "text": self.matched_text,
            "span": (self.start_pos, self.end_pos),
            "children": [child.to_dict() for child in self.children]
        }
    
    def print_tree(self, indent: int = 0):
        """Print tree structure"""
        print("  " * indent + f"{self.rule_name}: '{self.matched_text}'")
        for child in self.children:
            child.print_tree(indent + 1)

class RecursiveGrammarEngine:
    """EBNF grammar engine with recursive regex parsing"""
    
    def __init__(self):
        self.production_rules: Dict[str, ProductionRule] = {}
        self.parse_table: Dict[str, Dict[str, str]] = {}
        self.state_machine: Dict[str, List[ParseState]] = {}
        self.parse_cache: Dict[str, ParseTree] = {}
        
        # EBNF to regex translation patterns
        self.ebnf_translations = {
            # EBNF NOTATION -> REGEX EQUIVALENT
            'rule = A B C': r'(?P<rule>A B C)',           # ──[A]──[B]──[C]──
            'rule = A | B': r'(?P<rule>A|B)',             # ──⑂──[A] └──[B]
            'rule = [A]': r'(?P<rule>A?)',                # ──◇[A]──
            'rule = {A}': r'(?P<rule>A*)',                # ──∞[A]──
            'rule = (A)': r'(?P<rule>(?:A))',             # ──⟨A⟩──
            'rule = A - B': r'(?P<rule>(?!B)A)',          # ──[A∖B]──
        }
        
        # Initialize SYNTHOS cognitive grammar
        self._load_synthos_grammar()
    
    def _load_synthos_grammar(self):
        """Load the complete SYNTHOS cognitive grammar"""
        
        # ROOT PRODUCTION
        self.add_production_rule(
            ProductionRule(
                rule_name="SYNTHOS_ROOT",
                ebnf_pattern="PREAMBLE BODY EPILOGUE",
                regex_pattern=r'(?P<SYNTHOS_ROOT>(?P<PREAMBLE>\[\[SYNTHOS\]\]\s+v(?P<VERSION>\d+\.\d+)\s+@(?P<SESSION_ID>[A-F0-9]{8}))\n(?P<BODY>(?:STATEMENT\n)*)\n(?P<EPILOGUE>\[\[\/SYNTHOS\]\]))',
                production_type=ProductionType.NONTERMINAL,
                geometric_form="──[PREAMBLE]──[BODY]──[EPILOGUE]──"
            )
        )
        
        # STATEMENT PRODUCTION
        self.add_production_rule(
            ProductionRule(
                rule_name="STATEMENT",
                ebnf_pattern="DIRECTIVE SUBJECT PREDICATE [CLAUSE]",
                regex_pattern=r'(?P<STATEMENT>(?P<DIRECTIVE>DEFINE|ROUTE|BIND|EMIT|GATE|ASSERT|QUERY)\s+(?P<SUBJECT>\w+)\s+(?P<PREDICATE>(?:AS|TO|WITH|FROM|INTO|WHEN)\s+\S+)(?P<CLAUSE>(?:\s+IF\s+(?P<CONDITION>.+?))?))',
                production_type=ProductionType.NONTERMINAL,
                geometric_form="──[DIRECTIVE]──[SUBJECT]──[PREDICATE]──[CLAUSE?]──"
            )
        )
        
        # EXPRESSION PRODUCTION (recursive)
        self.add_production_rule(
            ProductionRule(
                rule_name="EXPRESSION",
                ebnf_pattern="LITERAL | SYMBOL | NUMBER | COMPOUND",
                regex_pattern=r'(?P<EXPRESSION>(?P<LITERAL>"[^"]*"|\'[^\']*\')|(?P<SYMBOL>[A-Z_][A-Z0-9_]*)|(?P<NUMBER>-?\d+(?:\.\d+)?)|(?P<COMPOUND>\(\s*(?P<OP>AND|OR|NOT|XOR|IMPLIES|IFF)\s+.+?\s*\)))',
                production_type=ProductionType.RECURSIVE,
                geometric_form="──[LITERAL|SYMBOL|NUMBER|COMPOUND]──"
            )
        )
        
        # GEOMETRY PRODUCTION
        self.add_production_rule(
            ProductionRule(
                rule_name="GEOMETRY",
                ebnf_pattern="POINT | LINE | POLYGON | LATTICE",
                regex_pattern=r'(?P<GEOMETRY>(?P<POINT>\((?P<GX>-?\d+),(?P<GY>-?\d+)\))|(?P<LINE>(?P<GP1>\(\S+,\S+\))--(?P<GP2>\(\S+,\S+\)))|(?P<POLYGON>\[\s*(?:\(\S+,\S+\)\s*,?\s*)+\s*\])|(?P<LATTICE>\[\[\s*(?:\[.+?\]\s*;?\s*)+\s*\]\]))',
                production_type=ProductionType.RECURSIVE,
                geometric_form="──[POINT|LINE|POLYGON|LATTICE]──"
            )
        )
        
        # Additional supporting productions
        self.add_production_rule(
            ProductionRule(
                rule_name="LITERAL",
                ebnf_pattern='"text" | \'text\'',
                regex_pattern=r'"[^"]*"|\'[^\']*\'',
                production_type=ProductionType.TERMINAL,
                geometric_form="──[STRING]──"
            )
        )
        
        self.add_production_rule(
            ProductionRule(
                rule_name="SYMBOL",
                ebnf_pattern="[A-Z_][A-Z0-9_]*",
                regex_pattern=r'[A-Z_][A-Z0-9_]*',
                production_type=ProductionType.TERMINAL,
                geometric_form="──[IDENTIFIER]──"
            )
        )
        
        self.add_production_rule(
            ProductionRule(
                rule_name="NUMBER",
                ebnf_pattern="-?digits[.digits]?",
                regex_pattern=r'-?\d+(?:\.\d+)?',
                production_type=ProductionType.TERMINAL,
                geometric_form="──[NUMERIC]──"
            )
        )
        
        # Build state machine
        self._build_state_machine()
    
    def add_production_rule(self, rule: ProductionRule):
        """Add a production rule to the grammar"""
        self.production_rules[rule.rule_name] = rule
    
    def translate_ebnf_to_regex(self, ebnf_pattern: str) -> str:
        """Translate EBNF pattern to regex"""
        # Simple translation rules
        translations = {
            r'(\w+)\s*=\s*([^|]+)\s+([^|]+)': lambda m: f'(?P<{m.group(1)}>{m.group(2)}\\s*{m.group(3)})',
            r'(\w+)\s*=\s*([^|]+)\s*\|\s*([^|]+)': lambda m: f'(?P<{m.group(1)}>{m.group(2)}|{m.group(3)})',
            r'(\w+)\s*=\s*\[([^\]]+)\]': lambda m: f'(?P<{m.group(1)}>{m.group(2)}?)',
            r'(\w+)\s*=\s*\{([^\}]+)\}': lambda m: f'(?P<{m.group(1)}>{m.group(2)}*)',
            r'(\w+)\s*=\s*\(([^)]+)\)': lambda m: f'(?P<{m.group(1)}>(?:{m.group(2)}))',
        }
        
        regex_pattern = ebnf_pattern
        for pattern, replacement in translations.items():
            regex_pattern = re.sub(pattern, replacement, regex_pattern)
        
        return regex_pattern
    
    def _build_state_machine(self):
        """Build finite state machine from production rules"""
        for rule_name, rule in self.production_rules.items():
            states = []
            
            # Initial state
            states.append(ParseState(
                state_id=f"{rule_name}_START",
                rule_name=rule_name,
                position=0,
                stack_depth=0
            ))
            
            # Intermediate states (simplified)
            states.append(ParseState(
                state_id=f"{rule_name}_PROCESSING",
                rule_name=rule_name,
                position=1,
                stack_depth=1
            ))
            
            # Accepting state
            states.append(ParseState(
                state_id=f"{rule_name}_ACCEPT",
                rule_name=rule_name,
                position=2,
                stack_depth=0,
                is_accepting=True
            ))
            
            self.state_machine[rule_name] = states
    
    def parse(self, input_text: str, start_rule: str = "SYNTHOS_ROOT") -> Optional[ParseTree]:
        """Parse input text using the grammar"""
        cache_key = f"{start_rule}:{hash(input_text)}"
        if cache_key in self.parse_cache:
            return self.parse_cache[cache_key]
        
        if start_rule not in self.production_rules:
            raise ValueError(f"Unknown start rule: {start_rule}")
        
        rule = self.production_rules[start_rule]
        
        try:
            # Compile regex with recursive support
            pattern = re.compile(rule.regex_pattern, re.DOTALL | re.VERBOSE)
            match = pattern.search(input_text)
            
            if match:
                # Build parse tree
                tree = self._build_parse_tree(match, start_rule, input_text)
                self.parse_cache[cache_key] = tree
                return tree
            else:
                return None
                
        except re.error as e:
            raise ValueError(f"Regex compilation error for rule {start_rule}: {e}")
    
    def _build_parse_tree(self, match: re.Match, rule_name: str, full_text: str,
                          _depth: int = 0) -> ParseTree:
        """Build parse tree from regex match"""
        tree = ParseTree(
            rule_name=rule_name,
            matched_text=match.group(0),
            start_pos=match.start(),
            end_pos=match.end()
        )
        
        # Guard against infinite recursion
        if _depth > 8:
            return tree
        
        # Recursively build child nodes for named captures
        for group_name, group_value in match.groupdict().items():
            if group_value and group_name in self.production_rules and group_name != rule_name:
                child_tree = self.parse(group_value, group_name)
                if child_tree:
                    tree.add_child(child_tree)
        
        return tree
    
    def validate_grammar(self) -> Dict[str, Any]:
        """Validate grammar for consistency and completeness"""
        validation_results = {
            "valid": True,
            "errors": [],
            "warnings": [],
            "statistics": {}
        }
        
        # Check for undefined dependencies
        all_rules = set(self.production_rules.keys())
        for rule_name, rule in self.production_rules.items():
            for dep in rule.dependencies:
                if dep not in all_rules:
                    validation_results["errors"].append(
                        f"Rule '{rule_name}' depends on undefined rule '{dep}'"
                    )
                    validation_results["valid"] = False
        
        # Check for left recursion (simplified)
        for rule_name, rule in self.production_rules.items():
            if rule_name in rule.dependencies:
                validation_results["warnings"].append(
                    f"Rule '{rule_name}' may be left-recursive"
                )
        
        # Calculate statistics
        validation_results["statistics"] = {
            "total_rules": len(self.production_rules),
            "terminal_rules": len([r for r in self.production_rules.values() if r.production_type == ProductionType.TERMINAL]),
            "recursive_rules": len([r for r in self.production_rules.values() if r.production_type == ProductionType.RECURSIVE]),
            "max_dependencies": max([len(r.dependencies) for r in self.production_rules.values()] + [0])
        }
        
        return validation_results
    
    def generate_language_sample(self, start_rule: str = "SYNTHOS_ROOT", max_depth: int = 5) -> str:
        """Generate a sample string from the grammar"""
        if start_rule not in self.production_rules:
            return ""
        
        def expand_rule(rule_name: str, depth: int) -> str:
            if depth > max_depth:
                return "..."
            
            rule = self.production_rules[rule_name]
            
            if rule.production_type == ProductionType.TERMINAL:
                # Generate sample for terminal
                if rule.rule_name == "LITERAL":
                    return '"sample_text"'
                elif rule.rule_name == "SYMBOL":
                    return "SAMPLE_SYMBOL"
                elif rule.rule_name == "NUMBER":
                    return "42"
                else:
                    return "terminal"
            
            elif rule.production_type == ProductionType.NONTERMINAL:
                # Expand based on pattern (simplified)
                if rule_name == "SYNTHOS_ROOT":
                    return f"[[SYNTHOS]] v1.0 @ABCDEF12\n{expand_rule('STATEMENT', depth+1)}\n[[/SYNTHOS]]"
                elif rule_name == "STATEMENT":
                    return f"DEFINE {expand_rule('SYMBOL', depth+1)} AS {expand_rule('EXPRESSION', depth+1)}"
                else:
                    return f"[{rule_name}]"
            
            elif rule.production_type == ProductionType.RECURSIVE:
                # Handle recursive rules
                if rule_name == "EXPRESSION":
                    return expand_rule("SYMBOL", depth+1)
                elif rule_name == "GEOMETRY":
                    return expand_rule("POINT", depth+1)
                else:
                    return f"[{rule_name}]"
            
            return f"[{rule_name}]"
        
        return expand_rule(start_rule, 0)
    
    def visualize_grammar(self, output_file: str = "grammar_graph.png"):
        """Visualize grammar as directed graph"""
        try:
            dot = graphviz.Digraph(comment='SYNTHOS Grammar', format='png')
            
            # Add nodes
            for rule_name, rule in self.production_rules.items():
                node_color = {
                    ProductionType.TERMINAL: 'lightgreen',
                    ProductionType.NONTERMINAL: 'lightblue',
                    ProductionType.RECURSIVE: 'lightcoral',
                    ProductionType.OPTION: 'lightyellow',
                    ProductionType.REPETITION: 'lightpink',
                    ProductionType.ALTERNATION: 'lightgray'
                }.get(rule.production_type, 'white')
                
                dot.node(rule_name, rule_name, fillcolor=node_color, style='filled')
            
            # Add edges for dependencies
            for rule_name, rule in self.production_rules.items():
                for dep in rule.dependencies:
                    dot.edge(rule_name, dep)
            
            # Render graph
            dot.render(output_file, cleanup=True)
            print(f"Grammar visualization saved to {output_file}")
            
        except ImportError:
            print("Graphviz not available for visualization")
    
    def export_grammar(self, filename: str):
        """Export grammar to JSON file"""
        import json
        
        export_data = {
            "production_rules": {
                name: {
                    "rule_name": rule.rule_name,
                    "ebnf_pattern": rule.ebnf_pattern,
                    "regex_pattern": rule.regex_pattern,
                    "production_type": rule.production_type.value,
                    "geometric_form": rule.geometric_form,
                    "dependencies": list(rule.dependencies)
                }
                for name, rule in self.production_rules.items()
            },
            "validation": self.validate_grammar(),
            "state_machine": {
                name: [
                    {
                        "state_id": state.state_id,
                        "rule_name": state.rule_name,
                        "position": state.position,
                        "stack_depth": state.stack_depth,
                        "is_accepting": state.is_accepting
                    }
                    for state in states
                ]
                for name, states in self.state_machine.items()
            }
        }
        
        with open(filename, 'w') as f:
            json.dump(export_data, f, indent=2)
        
        print(f"Grammar exported to {filename}")

# Example usage and demonstration
if __name__ == "__main__":
    print("=== SYNTHOS RECURSIVE GRAMMAR ENGINE DEMO ===")
    
    # Create grammar engine
    rge = RecursiveGrammarEngine()
    
    # Show loaded rules
    print(f"\nLoaded {len(rge.production_rules)} production rules:")
    for rule_name, rule in rge.production_rules.items():
        print(f"  {rule_name}: {rule.production_type.value} [{rule.geometric_form}]")
    
    # Validate grammar
    validation = rge.validate_grammar()
    print(f"\nGrammar Validation:")
    print(f"  Valid: {validation['valid']}")
    print(f"  Errors: {len(validation['errors'])}")
    print(f"  Warnings: {len(validation['warnings'])}")
    print(f"  Statistics: {validation['statistics']}")
    
    # Generate sample
    sample = rge.generate_language_sample("SYNTHOS_ROOT")
    print(f"\nGenerated Sample:")
    print(sample)
    
    # Parse sample text
    test_text = """[[SYNTHOS]] v1.0 @ABCDEF12
DEFINE SAMPLE_SYMBOL AS "test_value"
[[/SYNTHOS]]"""
    
    print(f"\nParsing test text:")
    print(test_text)
    
    parse_tree = rge.parse(test_text)
    if parse_tree:
        print(f"\nParse Tree:")
        parse_tree.print_tree()
        
        print(f"\nParse Tree (JSON):")
        import json
        print(json.dumps(parse_tree.to_dict(), indent=2))
    else:
        print("Parse failed")
    
    # Test individual rules
    print(f"\nTesting individual rule parsing:")
    
    # Test statement parsing
    statement_text = "DEFINE VARIABLE AS 42"
    statement_tree = rge.parse(statement_text, "STATEMENT")
    if statement_tree:
        print(f"Statement parse successful: {statement_tree.matched_text}")
        statement_tree.print_tree()
    
    # Test expression parsing
    expression_text = "SAMPLE_SYMBOL"
    expression_tree = rge.parse(expression_text, "EXPRESSION")
    if expression_tree:
        print(f"Expression parse successful: {expression_tree.matched_text}")
    
    # Visualize grammar (if graphviz available)
    try:
        rge.visualize_grammar("synthos_grammar.png")
    except Exception as e:
        print(f"Visualization failed: {e}")
    
    # Export grammar
    rge.export_grammar("synthos_grammar.json")
    
    print("\n=== RGE DEMO COMPLETE ===")
