timmy-config/allegro/goap/planner.py

#!/usr/bin/env python3
"""
GOAP Planner Module - Allegro-Primus Child Autonomy System
A* search-based planning for action sequences to achieve goals.
"""

import heapq
import time
import json
from typing import Dict, List, Optional, Set, Tuple, Any, Callable
from dataclasses import dataclass, field
from copy import deepcopy
from enum import Enum, auto

from actions import Action, ActionLibrary, action_library
from goals import Goal, GoalManager, goal_manager


class PlanStatus(Enum):
    """Status of a plan"""
    PENDING = auto()
    IN_PROGRESS = auto()
    COMPLETED = auto()
    FAILED = auto()
    CANCELLED = auto()


@dataclass(order=True)
class PlanNode:
    """Node in the planning graph"""
    f_score: float  # Total cost (g + h)
    g_score: float = field(compare=False)  # Cost from start
    h_score: float = field(compare=False)  # Heuristic estimate to goal
    world_state: Dict = field(compare=False)
    action: Optional[Action] = field(compare=False, default=None)
    parent: Optional['PlanNode'] = field(compare=False, default=None)
    depth: int = field(compare=False, default=0)

    def __hash__(self):
        # Hash based on world state for visited set
        return hash(json.dumps(self.world_state, sort_keys=True, default=str))

    def __eq__(self, other):
        if isinstance(other, PlanNode):
            return json.dumps(self.world_state, sort_keys=True, default=str) == \
                   json.dumps(other.world_state, sort_keys=True, default=str)
        return False


@dataclass
class Plan:
    """An action plan"""
    goal: Goal
    actions: List[Action]
    estimated_cost: float
    estimated_duration: float
    status: PlanStatus = PlanStatus.PENDING
    created_at: float = field(default_factory=time.time)
    started_at: Optional[float] = None
    completed_at: Optional[float] = None
    execution_trace: List[Dict] = field(default_factory=list)

    def __len__(self):
        return len(self.actions)

    @property
    def duration(self) -> float:
        """Get plan duration (if completed)"""
        if self.completed_at and self.started_at:
            return self.completed_at - self.started_at
        return 0.0

    def to_dict(self) -> Dict:
        return {
            'goal': self.goal.name,
            'action_count': len(self.actions),
            'actions': [a.name for a in self.actions],
            'estimated_cost': self.estimated_cost,
            'estimated_duration': self.estimated_duration,
            'status': self.status.name,
            'created_at': self.created_at,
            'started_at': self.started_at,
            'completed_at': self.completed_at,
            'duration': self.duration
        }


class HeuristicCalculator:
    """Calculates heuristic estimates for planning"""

    @staticmethod
    def distance_to_goal(world_state: Dict[str, Any], goal: Goal) -> float:
        """
        Calculate estimated distance to goal.
        Returns value between 0 (at goal) and 1 (far from goal).
        """
        desired = goal.get_desired_state()
        if not desired:
            return 0.0

        total_distance = 0.0
        count = 0

        for key, target_value in desired.items():
            actual_value = HeuristicCalculator._get_nested_value(world_state, key)

            if actual_value is None:
                total_distance += 1.0
            elif isinstance(target_value, (int, float)) and isinstance(actual_value, (int, float)):
                # Numeric distance
                if target_value != 0:
                    diff = abs(actual_value - target_value) / abs(target_value)
                    total_distance += min(1.0, diff)
                else:
                    total_distance += 0.0 if actual_value == 0 else 1.0
            elif isinstance(target_value, bool):
                # Boolean distance
                total_distance += 0.0 if actual_value == target_value else 1.0
            elif isinstance(target_value, (list, str)):
                # String/list distance
                total_distance += 0.0 if actual_value == target_value else 1.0
            else:
                total_distance += 0.5  # Unknown type

            count += 1

        return total_distance / max(1, count)

    @staticmethod
    def _get_nested_value(d: Dict, key: str) -> Any:
        """Get value from nested dict using dot notation"""
        keys = key.split('.')
        value = d
        for k in keys:
            if isinstance(value, dict):
                value = value.get(k)
            else:
                return None
        return value


class GOAPPlanner:
    """
    Goal-Oriented Action Planner using A* search.
    Finds optimal action sequences to achieve goals.
    """

    def __init__(
        self,
        action_library: ActionLibrary = None,
        max_depth: int = 10,
        max_nodes: int = 1000,
        timeout_seconds: float = 5.0
    ):
        self.action_library = action_library or ActionLibrary()
        self.max_depth = max_depth
        self.max_nodes = max_nodes
        self.timeout_seconds = timeout_seconds
        self.heuristic = HeuristicCalculator()

        # Statistics
        self.plans_created = 0
        self.plans_succeeded = 0
        self.average_planning_time = 0.0
        self.total_planning_time = 0.0

    def plan(
        self,
        goal: Goal,
        world_state: Dict[str, Any],
        forbidden_actions: Optional[Set[str]] = None
    ) -> Optional[Plan]:
        """
        Create a plan to achieve the goal from the current world state.
        Uses A* search to find optimal action sequence.
        """
        start_time = time.time()
        forbidden = forbidden_actions or set()

        # Check if goal is already satisfied
        if goal.evaluate(world_state) >= 0.95:
            return Plan(
                goal=goal,
                actions=[],
                estimated_cost=0.0,
                estimated_duration=0.0,
                status=PlanStatus.COMPLETED
            )

        # Initialize A* search
        initial_node = PlanNode(
            f_score=self.heuristic.distance_to_goal(world_state, goal),
            g_score=0.0,
            h_score=self.heuristic.distance_to_goal(world_state, goal),
            world_state=deepcopy(world_state),
            depth=0
        )

        # Priority queue: (f_score, counter, node)
        counter = 0
        open_set = [(initial_node.f_score, counter, initial_node)]

        # Visited states with best g_score
        visited: Dict[int, float] = {hash(initial_node): 0.0}

        # For path reconstruction
        came_from: Dict[int, Tuple[int, Action]] = {}

        nodes_expanded = 0

        while open_set and nodes_expanded < self.max_nodes:
            # Check timeout
            if time.time() - start_time > self.timeout_seconds:
                break

            # Get node with lowest f_score
            _, _, current = heapq.heappop(open_set)
            nodes_expanded += 1

            # Check if goal is satisfied in this state
            goal_satisfaction = goal.evaluate(current.world_state)
            if goal_satisfaction >= 0.95:
                # Reconstruct plan
                plan = self._reconstruct_plan(
                    goal, current, came_from, initial_node
                )
                self._update_stats(start_time, True)
                return plan

            # Expand node if not at max depth
            if current.depth >= self.max_depth:
                continue

            # Generate successors
            for action in self.action_library.get_all():
                # Skip forbidden actions
                if action.name in forbidden:
                    continue

                # Skip if preconditions not met
                if not action.check_preconditions(current.world_state):
                    continue

                # Apply action effects
                new_state = action.apply_effects(current.world_state)

                # Create new node
                g_score = current.g_score + action.cost
                h_score = self.heuristic.distance_to_goal(new_state, goal)
                f_score = g_score + h_score

                new_node = PlanNode(
                    f_score=f_score,
                    g_score=g_score,
                    h_score=h_score,
                    world_state=new_state,
                    action=action,
                    parent=current,
                    depth=current.depth + 1
                )

                node_hash = hash(new_node)

                # Check if we've seen this state with a better path
                if node_hash in visited and visited[node_hash] <= g_score:
                    continue

                visited[node_hash] = g_score
                came_from[node_hash] = (hash(current), action)

                counter += 1
                heapq.heappush(open_set, (f_score, counter, new_node))

        # Planning failed
        self._update_stats(start_time, False)
        return None

    def _reconstruct_plan(
        self,
        goal: Goal,
        end_node: PlanNode,
        came_from: Dict[int, Tuple[int, Action]],
        start_node: PlanNode
    ) -> Plan:
        """Reconstruct action sequence from came_from map"""
        actions = []
        current_hash = hash(end_node)
        start_hash = hash(start_node)

        total_cost = 0.0
        estimated_duration = 0.0

        while current_hash != start_hash:
            if current_hash not in came_from:
                break

            prev_hash, action = came_from[current_hash]
            actions.append(action)
            total_cost += action.cost
            # Estimate 30 seconds per action
            estimated_duration += 30.0
            current_hash = prev_hash

        actions.reverse()

        return Plan(
            goal=goal,
            actions=actions,
            estimated_cost=total_cost,
            estimated_duration=estimated_duration
        )

    def _update_stats(self, start_time: float, succeeded: bool):
        """Update planning statistics"""
        elapsed = time.time() - start_time
        self.plans_created += 1
        self.total_planning_time += elapsed
        self.average_planning_time = self.total_planning_time / self.plans_created
        if succeeded:
            self.plans_succeeded += 1

    def plan_multi_goal(
        self,
        goals: List[Goal],
        world_state: Dict[str, Any],
        strategy: str = 'sequential'
    ) -> List[Plan]:
        """
        Plan for multiple goals.
        Strategies: 'sequential', 'interleaved', 'prioritized'
        """
        plans = []
        current_state = deepcopy(world_state)

        if strategy == 'sequential':
            # Plan for each goal in sequence
            for goal in sorted(goals, key=lambda g: g.state.effective_priority, reverse=True):
                plan = self.plan(goal, current_state)
                if plan:
                    plans.append(plan)
                    # Update state as if plan was executed
                    for action in plan.actions:
                        current_state = action.apply_effects(current_state)

        elif strategy == 'prioritized':
            # Plan only for highest priority goal
            if goals:
                top_goal = max(goals, key=lambda g: g.state.effective_priority)
                plan = self.plan(top_goal, current_state)
                if plan:
                    plans.append(plan)

        return plans

    def replan(
        self,
        current_plan: Plan,
        world_state: Dict[str, Any],
        failed_action_index: int
    ) -> Optional[Plan]:
        """
        Replan from current state after an action failure.
        """
        # Get remaining actions after failed action
        remaining_actions = current_plan.actions[failed_action_index + 1:]

        # Try to plan from current state
        new_plan = self.plan(current_plan.goal, world_state)

        if new_plan:
            return new_plan

        # If planning failed, try with relaxed constraints
        # or plan for sub-goals
        return None

    def get_stats(self) -> Dict:
        """Get planner statistics"""
        return {
            'plans_created': self.plans_created,
            'plans_succeeded': self.plans_succeeded,
            'success_rate': self.plans_succeeded / max(1, self.plans_created),
            'average_planning_time': self.average_planning_time,
            'total_planning_time': self.total_planning_time,
            'max_depth': self.max_depth,
            'max_nodes': self.max_nodes,
            'timeout_seconds': self.timeout_seconds
        }


class PlanOptimizer:
    """Optimizes plans for better performance"""

    @staticmethod
    def remove_redundancies(plan: Plan) -> Plan:
        """Remove redundant actions from plan"""
        if not plan.actions:
            return plan

        optimized = []
        last_effects = {}

        for action in plan.actions:
            # Check if action effects are already satisfied
            redundant = True
            for key, value in action.effects.items():
                if last_effects.get(key) != value:
                    redundant = False
                    break

            if not redundant:
                optimized.append(action)
                last_effects.update(action.effects)

        return Plan(
            goal=plan.goal,
            actions=optimized,
            estimated_cost=sum(a.cost for a in optimized),
            estimated_duration=len(optimized) * 30.0
        )

    @staticmethod
    def parallelize_actions(plan: Plan) -> List[List[Action]]:
        """
        Identify actions that can be executed in parallel.
        Returns list of action groups.
        """
        if not plan.actions:
            return []

        parallel_groups = []
        current_group = []

        for action in plan.actions:
            if not current_group:
                current_group.append(action)
            else:
                # Check if action is independent of current group
                independent = True
                for grouped_action in current_group:
                    # Check for effect/precondition overlap
                    if set(action.preconditions.keys()) & set(grouped_action.effects.keys()):
                        independent = False
                        break

                if independent:
                    current_group.append(action)
                else:
                    parallel_groups.append(current_group)
                    current_group = [action]

        if current_group:
            parallel_groups.append(current_group)

        return parallel_groups


# =============================================================================
# PLAN LEARNING
# =============================================================================

class PlanLibrary:
    """Library of successful plans for reuse"""

    def __init__(self, storage_path: Optional[str] = None):
        self.storage_path = storage_path or "/root/allegro/goap/plan_library.json"
        self.plans: Dict[str, List[Plan]] = {}  # goal_name -> list of plans
        self._load()

    def _load(self):
        """Load plan library from disk"""
        import os
        if os.path.exists(self.storage_path):
            try:
                with open(self.storage_path, 'r') as f:
                    data = json.load(f)
                    # Plans would need proper deserialization
                    self.plans = {}
            except:
                self.plans = {}

    def save(self):
        """Save plan library to disk"""
        with open(self.storage_path, 'w') as f:
            json.dump({
                goal: [p.to_dict() for p in plans]
                for goal, plans in self.plans.items()
            }, f, indent=2)

    def add_plan(self, plan: Plan):
        """Add a successful plan to the library"""
        goal_name = plan.goal.name
        if goal_name not in self.plans:
            self.plans[goal_name] = []
        self.plans[goal_name].append(plan)
        self.save()

    def find_similar_plan(
        self,
        goal: Goal,
        world_state: Dict[str, Any]
    ) -> Optional[Plan]:
        """Find a similar successful plan"""
        if goal.name not in self.plans:
            return None

        # Find plan with most similar initial state
        best_match = None
        best_similarity = 0.0

        for plan in self.plans[goal.name]:
            # Simple heuristic: plan that achieved the goal
            if plan.status == PlanStatus.COMPLETED:
                return plan

        return None


# Singleton planner instance
planner = GOAPPlanner()


if __name__ == "__main__":
    # Test the planner
    print("=== GOAP Planner Module Test ===")

    from goals import SystemHealthGoal
    from actions import CheckSystemHealth, CleanupResources

    # Create test goal and state
    goal = SystemHealthGoal()
    world_state = {
        'system': {
            'cpu_percent': 45,
            'memory_percent': 60,
            'disk_percent': 85,  # High disk usage - needs cleanup
            'uptime_hours': 48,
            'recent_errors': 2,
            'health_checked': False
        }
    }

    print(f"\nGoal: {goal.name}")
    print(f"Initial satisfaction: {goal.evaluate(world_state):.2f}")
    print(f"Desired state: {goal.get_desired_state()}")

    # Create plan
    print("\n=== Planning ===")
    start = time.time()
    plan = planner.plan(goal, world_state)
    elapsed = time.time() - start

    if plan:
        print(f"Plan found in {elapsed:.3f}s")
        print(f"Actions: {len(plan.actions)}")
        print(f"Estimated cost: {plan.estimated_cost}")
        for i, action in enumerate(plan.actions):
            print(f"  {i+1}. {action.name}")
    else:
        print("No plan found")

    print("\n=== Planner Stats ===")
    print(json.dumps(planner.get_stats(), indent=2))