Files
timmy-config/allegro/goap/planner.py
2026-03-31 20:02:01 +00:00

564 lines
18 KiB
Python
Executable File

#!/usr/bin/env python3
"""
GOAP Planner Module - Allegro-Primus Child Autonomy System
A* search-based planning for action sequences to achieve goals.
"""
import heapq
import time
import json
from typing import Dict, List, Optional, Set, Tuple, Any, Callable
from dataclasses import dataclass, field
from copy import deepcopy
from enum import Enum, auto
from actions import Action, ActionLibrary, action_library
from goals import Goal, GoalManager, goal_manager
class PlanStatus(Enum):
"""Status of a plan"""
PENDING = auto()
IN_PROGRESS = auto()
COMPLETED = auto()
FAILED = auto()
CANCELLED = auto()
@dataclass(order=True)
class PlanNode:
"""Node in the planning graph"""
f_score: float # Total cost (g + h)
g_score: float = field(compare=False) # Cost from start
h_score: float = field(compare=False) # Heuristic estimate to goal
world_state: Dict = field(compare=False)
action: Optional[Action] = field(compare=False, default=None)
parent: Optional['PlanNode'] = field(compare=False, default=None)
depth: int = field(compare=False, default=0)
def __hash__(self):
# Hash based on world state for visited set
return hash(json.dumps(self.world_state, sort_keys=True, default=str))
def __eq__(self, other):
if isinstance(other, PlanNode):
return json.dumps(self.world_state, sort_keys=True, default=str) == \
json.dumps(other.world_state, sort_keys=True, default=str)
return False
@dataclass
class Plan:
"""An action plan"""
goal: Goal
actions: List[Action]
estimated_cost: float
estimated_duration: float
status: PlanStatus = PlanStatus.PENDING
created_at: float = field(default_factory=time.time)
started_at: Optional[float] = None
completed_at: Optional[float] = None
execution_trace: List[Dict] = field(default_factory=list)
def __len__(self):
return len(self.actions)
@property
def duration(self) -> float:
"""Get plan duration (if completed)"""
if self.completed_at and self.started_at:
return self.completed_at - self.started_at
return 0.0
def to_dict(self) -> Dict:
return {
'goal': self.goal.name,
'action_count': len(self.actions),
'actions': [a.name for a in self.actions],
'estimated_cost': self.estimated_cost,
'estimated_duration': self.estimated_duration,
'status': self.status.name,
'created_at': self.created_at,
'started_at': self.started_at,
'completed_at': self.completed_at,
'duration': self.duration
}
class HeuristicCalculator:
"""Calculates heuristic estimates for planning"""
@staticmethod
def distance_to_goal(world_state: Dict[str, Any], goal: Goal) -> float:
"""
Calculate estimated distance to goal.
Returns value between 0 (at goal) and 1 (far from goal).
"""
desired = goal.get_desired_state()
if not desired:
return 0.0
total_distance = 0.0
count = 0
for key, target_value in desired.items():
actual_value = HeuristicCalculator._get_nested_value(world_state, key)
if actual_value is None:
total_distance += 1.0
elif isinstance(target_value, (int, float)) and isinstance(actual_value, (int, float)):
# Numeric distance
if target_value != 0:
diff = abs(actual_value - target_value) / abs(target_value)
total_distance += min(1.0, diff)
else:
total_distance += 0.0 if actual_value == 0 else 1.0
elif isinstance(target_value, bool):
# Boolean distance
total_distance += 0.0 if actual_value == target_value else 1.0
elif isinstance(target_value, (list, str)):
# String/list distance
total_distance += 0.0 if actual_value == target_value else 1.0
else:
total_distance += 0.5 # Unknown type
count += 1
return total_distance / max(1, count)
@staticmethod
def _get_nested_value(d: Dict, key: str) -> Any:
"""Get value from nested dict using dot notation"""
keys = key.split('.')
value = d
for k in keys:
if isinstance(value, dict):
value = value.get(k)
else:
return None
return value
class GOAPPlanner:
"""
Goal-Oriented Action Planner using A* search.
Finds optimal action sequences to achieve goals.
"""
def __init__(
self,
action_library: ActionLibrary = None,
max_depth: int = 10,
max_nodes: int = 1000,
timeout_seconds: float = 5.0
):
self.action_library = action_library or ActionLibrary()
self.max_depth = max_depth
self.max_nodes = max_nodes
self.timeout_seconds = timeout_seconds
self.heuristic = HeuristicCalculator()
# Statistics
self.plans_created = 0
self.plans_succeeded = 0
self.average_planning_time = 0.0
self.total_planning_time = 0.0
def plan(
self,
goal: Goal,
world_state: Dict[str, Any],
forbidden_actions: Optional[Set[str]] = None
) -> Optional[Plan]:
"""
Create a plan to achieve the goal from the current world state.
Uses A* search to find optimal action sequence.
"""
start_time = time.time()
forbidden = forbidden_actions or set()
# Check if goal is already satisfied
if goal.evaluate(world_state) >= 0.95:
return Plan(
goal=goal,
actions=[],
estimated_cost=0.0,
estimated_duration=0.0,
status=PlanStatus.COMPLETED
)
# Initialize A* search
initial_node = PlanNode(
f_score=self.heuristic.distance_to_goal(world_state, goal),
g_score=0.0,
h_score=self.heuristic.distance_to_goal(world_state, goal),
world_state=deepcopy(world_state),
depth=0
)
# Priority queue: (f_score, counter, node)
counter = 0
open_set = [(initial_node.f_score, counter, initial_node)]
# Visited states with best g_score
visited: Dict[int, float] = {hash(initial_node): 0.0}
# For path reconstruction
came_from: Dict[int, Tuple[int, Action]] = {}
nodes_expanded = 0
while open_set and nodes_expanded < self.max_nodes:
# Check timeout
if time.time() - start_time > self.timeout_seconds:
break
# Get node with lowest f_score
_, _, current = heapq.heappop(open_set)
nodes_expanded += 1
# Check if goal is satisfied in this state
goal_satisfaction = goal.evaluate(current.world_state)
if goal_satisfaction >= 0.95:
# Reconstruct plan
plan = self._reconstruct_plan(
goal, current, came_from, initial_node
)
self._update_stats(start_time, True)
return plan
# Expand node if not at max depth
if current.depth >= self.max_depth:
continue
# Generate successors
for action in self.action_library.get_all():
# Skip forbidden actions
if action.name in forbidden:
continue
# Skip if preconditions not met
if not action.check_preconditions(current.world_state):
continue
# Apply action effects
new_state = action.apply_effects(current.world_state)
# Create new node
g_score = current.g_score + action.cost
h_score = self.heuristic.distance_to_goal(new_state, goal)
f_score = g_score + h_score
new_node = PlanNode(
f_score=f_score,
g_score=g_score,
h_score=h_score,
world_state=new_state,
action=action,
parent=current,
depth=current.depth + 1
)
node_hash = hash(new_node)
# Check if we've seen this state with a better path
if node_hash in visited and visited[node_hash] <= g_score:
continue
visited[node_hash] = g_score
came_from[node_hash] = (hash(current), action)
counter += 1
heapq.heappush(open_set, (f_score, counter, new_node))
# Planning failed
self._update_stats(start_time, False)
return None
def _reconstruct_plan(
self,
goal: Goal,
end_node: PlanNode,
came_from: Dict[int, Tuple[int, Action]],
start_node: PlanNode
) -> Plan:
"""Reconstruct action sequence from came_from map"""
actions = []
current_hash = hash(end_node)
start_hash = hash(start_node)
total_cost = 0.0
estimated_duration = 0.0
while current_hash != start_hash:
if current_hash not in came_from:
break
prev_hash, action = came_from[current_hash]
actions.append(action)
total_cost += action.cost
# Estimate 30 seconds per action
estimated_duration += 30.0
current_hash = prev_hash
actions.reverse()
return Plan(
goal=goal,
actions=actions,
estimated_cost=total_cost,
estimated_duration=estimated_duration
)
def _update_stats(self, start_time: float, succeeded: bool):
"""Update planning statistics"""
elapsed = time.time() - start_time
self.plans_created += 1
self.total_planning_time += elapsed
self.average_planning_time = self.total_planning_time / self.plans_created
if succeeded:
self.plans_succeeded += 1
def plan_multi_goal(
self,
goals: List[Goal],
world_state: Dict[str, Any],
strategy: str = 'sequential'
) -> List[Plan]:
"""
Plan for multiple goals.
Strategies: 'sequential', 'interleaved', 'prioritized'
"""
plans = []
current_state = deepcopy(world_state)
if strategy == 'sequential':
# Plan for each goal in sequence
for goal in sorted(goals, key=lambda g: g.state.effective_priority, reverse=True):
plan = self.plan(goal, current_state)
if plan:
plans.append(plan)
# Update state as if plan was executed
for action in plan.actions:
current_state = action.apply_effects(current_state)
elif strategy == 'prioritized':
# Plan only for highest priority goal
if goals:
top_goal = max(goals, key=lambda g: g.state.effective_priority)
plan = self.plan(top_goal, current_state)
if plan:
plans.append(plan)
return plans
def replan(
self,
current_plan: Plan,
world_state: Dict[str, Any],
failed_action_index: int
) -> Optional[Plan]:
"""
Replan from current state after an action failure.
"""
# Get remaining actions after failed action
remaining_actions = current_plan.actions[failed_action_index + 1:]
# Try to plan from current state
new_plan = self.plan(current_plan.goal, world_state)
if new_plan:
return new_plan
# If planning failed, try with relaxed constraints
# or plan for sub-goals
return None
def get_stats(self) -> Dict:
"""Get planner statistics"""
return {
'plans_created': self.plans_created,
'plans_succeeded': self.plans_succeeded,
'success_rate': self.plans_succeeded / max(1, self.plans_created),
'average_planning_time': self.average_planning_time,
'total_planning_time': self.total_planning_time,
'max_depth': self.max_depth,
'max_nodes': self.max_nodes,
'timeout_seconds': self.timeout_seconds
}
class PlanOptimizer:
"""Optimizes plans for better performance"""
@staticmethod
def remove_redundancies(plan: Plan) -> Plan:
"""Remove redundant actions from plan"""
if not plan.actions:
return plan
optimized = []
last_effects = {}
for action in plan.actions:
# Check if action effects are already satisfied
redundant = True
for key, value in action.effects.items():
if last_effects.get(key) != value:
redundant = False
break
if not redundant:
optimized.append(action)
last_effects.update(action.effects)
return Plan(
goal=plan.goal,
actions=optimized,
estimated_cost=sum(a.cost for a in optimized),
estimated_duration=len(optimized) * 30.0
)
@staticmethod
def parallelize_actions(plan: Plan) -> List[List[Action]]:
"""
Identify actions that can be executed in parallel.
Returns list of action groups.
"""
if not plan.actions:
return []
parallel_groups = []
current_group = []
for action in plan.actions:
if not current_group:
current_group.append(action)
else:
# Check if action is independent of current group
independent = True
for grouped_action in current_group:
# Check for effect/precondition overlap
if set(action.preconditions.keys()) & set(grouped_action.effects.keys()):
independent = False
break
if independent:
current_group.append(action)
else:
parallel_groups.append(current_group)
current_group = [action]
if current_group:
parallel_groups.append(current_group)
return parallel_groups
# =============================================================================
# PLAN LEARNING
# =============================================================================
class PlanLibrary:
"""Library of successful plans for reuse"""
def __init__(self, storage_path: Optional[str] = None):
self.storage_path = storage_path or "/root/allegro/goap/plan_library.json"
self.plans: Dict[str, List[Plan]] = {} # goal_name -> list of plans
self._load()
def _load(self):
"""Load plan library from disk"""
import os
if os.path.exists(self.storage_path):
try:
with open(self.storage_path, 'r') as f:
data = json.load(f)
# Plans would need proper deserialization
self.plans = {}
except:
self.plans = {}
def save(self):
"""Save plan library to disk"""
with open(self.storage_path, 'w') as f:
json.dump({
goal: [p.to_dict() for p in plans]
for goal, plans in self.plans.items()
}, f, indent=2)
def add_plan(self, plan: Plan):
"""Add a successful plan to the library"""
goal_name = plan.goal.name
if goal_name not in self.plans:
self.plans[goal_name] = []
self.plans[goal_name].append(plan)
self.save()
def find_similar_plan(
self,
goal: Goal,
world_state: Dict[str, Any]
) -> Optional[Plan]:
"""Find a similar successful plan"""
if goal.name not in self.plans:
return None
# Find plan with most similar initial state
best_match = None
best_similarity = 0.0
for plan in self.plans[goal.name]:
# Simple heuristic: plan that achieved the goal
if plan.status == PlanStatus.COMPLETED:
return plan
return None
# Singleton planner instance
planner = GOAPPlanner()
if __name__ == "__main__":
# Test the planner
print("=== GOAP Planner Module Test ===")
from goals import SystemHealthGoal
from actions import CheckSystemHealth, CleanupResources
# Create test goal and state
goal = SystemHealthGoal()
world_state = {
'system': {
'cpu_percent': 45,
'memory_percent': 60,
'disk_percent': 85, # High disk usage - needs cleanup
'uptime_hours': 48,
'recent_errors': 2,
'health_checked': False
}
}
print(f"\nGoal: {goal.name}")
print(f"Initial satisfaction: {goal.evaluate(world_state):.2f}")
print(f"Desired state: {goal.get_desired_state()}")
# Create plan
print("\n=== Planning ===")
start = time.time()
plan = planner.plan(goal, world_state)
elapsed = time.time() - start
if plan:
print(f"Plan found in {elapsed:.3f}s")
print(f"Actions: {len(plan.actions)}")
print(f"Estimated cost: {plan.estimated_cost}")
for i, action in enumerate(plan.actions):
print(f" {i+1}. {action.name}")
else:
print("No plan found")
print("\n=== Planner Stats ===")
print(json.dumps(planner.get_stats(), indent=2))