564 lines
18 KiB
Python
Executable File
564 lines
18 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
GOAP Planner Module - Allegro-Primus Child Autonomy System
|
|
A* search-based planning for action sequences to achieve goals.
|
|
"""
|
|
|
|
import heapq
|
|
import time
|
|
import json
|
|
from typing import Dict, List, Optional, Set, Tuple, Any, Callable
|
|
from dataclasses import dataclass, field
|
|
from copy import deepcopy
|
|
from enum import Enum, auto
|
|
|
|
from actions import Action, ActionLibrary, action_library
|
|
from goals import Goal, GoalManager, goal_manager
|
|
|
|
|
|
class PlanStatus(Enum):
|
|
"""Status of a plan"""
|
|
PENDING = auto()
|
|
IN_PROGRESS = auto()
|
|
COMPLETED = auto()
|
|
FAILED = auto()
|
|
CANCELLED = auto()
|
|
|
|
|
|
@dataclass(order=True)
|
|
class PlanNode:
|
|
"""Node in the planning graph"""
|
|
f_score: float # Total cost (g + h)
|
|
g_score: float = field(compare=False) # Cost from start
|
|
h_score: float = field(compare=False) # Heuristic estimate to goal
|
|
world_state: Dict = field(compare=False)
|
|
action: Optional[Action] = field(compare=False, default=None)
|
|
parent: Optional['PlanNode'] = field(compare=False, default=None)
|
|
depth: int = field(compare=False, default=0)
|
|
|
|
def __hash__(self):
|
|
# Hash based on world state for visited set
|
|
return hash(json.dumps(self.world_state, sort_keys=True, default=str))
|
|
|
|
def __eq__(self, other):
|
|
if isinstance(other, PlanNode):
|
|
return json.dumps(self.world_state, sort_keys=True, default=str) == \
|
|
json.dumps(other.world_state, sort_keys=True, default=str)
|
|
return False
|
|
|
|
|
|
@dataclass
|
|
class Plan:
|
|
"""An action plan"""
|
|
goal: Goal
|
|
actions: List[Action]
|
|
estimated_cost: float
|
|
estimated_duration: float
|
|
status: PlanStatus = PlanStatus.PENDING
|
|
created_at: float = field(default_factory=time.time)
|
|
started_at: Optional[float] = None
|
|
completed_at: Optional[float] = None
|
|
execution_trace: List[Dict] = field(default_factory=list)
|
|
|
|
def __len__(self):
|
|
return len(self.actions)
|
|
|
|
@property
|
|
def duration(self) -> float:
|
|
"""Get plan duration (if completed)"""
|
|
if self.completed_at and self.started_at:
|
|
return self.completed_at - self.started_at
|
|
return 0.0
|
|
|
|
def to_dict(self) -> Dict:
|
|
return {
|
|
'goal': self.goal.name,
|
|
'action_count': len(self.actions),
|
|
'actions': [a.name for a in self.actions],
|
|
'estimated_cost': self.estimated_cost,
|
|
'estimated_duration': self.estimated_duration,
|
|
'status': self.status.name,
|
|
'created_at': self.created_at,
|
|
'started_at': self.started_at,
|
|
'completed_at': self.completed_at,
|
|
'duration': self.duration
|
|
}
|
|
|
|
|
|
class HeuristicCalculator:
|
|
"""Calculates heuristic estimates for planning"""
|
|
|
|
@staticmethod
|
|
def distance_to_goal(world_state: Dict[str, Any], goal: Goal) -> float:
|
|
"""
|
|
Calculate estimated distance to goal.
|
|
Returns value between 0 (at goal) and 1 (far from goal).
|
|
"""
|
|
desired = goal.get_desired_state()
|
|
if not desired:
|
|
return 0.0
|
|
|
|
total_distance = 0.0
|
|
count = 0
|
|
|
|
for key, target_value in desired.items():
|
|
actual_value = HeuristicCalculator._get_nested_value(world_state, key)
|
|
|
|
if actual_value is None:
|
|
total_distance += 1.0
|
|
elif isinstance(target_value, (int, float)) and isinstance(actual_value, (int, float)):
|
|
# Numeric distance
|
|
if target_value != 0:
|
|
diff = abs(actual_value - target_value) / abs(target_value)
|
|
total_distance += min(1.0, diff)
|
|
else:
|
|
total_distance += 0.0 if actual_value == 0 else 1.0
|
|
elif isinstance(target_value, bool):
|
|
# Boolean distance
|
|
total_distance += 0.0 if actual_value == target_value else 1.0
|
|
elif isinstance(target_value, (list, str)):
|
|
# String/list distance
|
|
total_distance += 0.0 if actual_value == target_value else 1.0
|
|
else:
|
|
total_distance += 0.5 # Unknown type
|
|
|
|
count += 1
|
|
|
|
return total_distance / max(1, count)
|
|
|
|
@staticmethod
|
|
def _get_nested_value(d: Dict, key: str) -> Any:
|
|
"""Get value from nested dict using dot notation"""
|
|
keys = key.split('.')
|
|
value = d
|
|
for k in keys:
|
|
if isinstance(value, dict):
|
|
value = value.get(k)
|
|
else:
|
|
return None
|
|
return value
|
|
|
|
|
|
class GOAPPlanner:
|
|
"""
|
|
Goal-Oriented Action Planner using A* search.
|
|
Finds optimal action sequences to achieve goals.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
action_library: ActionLibrary = None,
|
|
max_depth: int = 10,
|
|
max_nodes: int = 1000,
|
|
timeout_seconds: float = 5.0
|
|
):
|
|
self.action_library = action_library or ActionLibrary()
|
|
self.max_depth = max_depth
|
|
self.max_nodes = max_nodes
|
|
self.timeout_seconds = timeout_seconds
|
|
self.heuristic = HeuristicCalculator()
|
|
|
|
# Statistics
|
|
self.plans_created = 0
|
|
self.plans_succeeded = 0
|
|
self.average_planning_time = 0.0
|
|
self.total_planning_time = 0.0
|
|
|
|
def plan(
|
|
self,
|
|
goal: Goal,
|
|
world_state: Dict[str, Any],
|
|
forbidden_actions: Optional[Set[str]] = None
|
|
) -> Optional[Plan]:
|
|
"""
|
|
Create a plan to achieve the goal from the current world state.
|
|
Uses A* search to find optimal action sequence.
|
|
"""
|
|
start_time = time.time()
|
|
forbidden = forbidden_actions or set()
|
|
|
|
# Check if goal is already satisfied
|
|
if goal.evaluate(world_state) >= 0.95:
|
|
return Plan(
|
|
goal=goal,
|
|
actions=[],
|
|
estimated_cost=0.0,
|
|
estimated_duration=0.0,
|
|
status=PlanStatus.COMPLETED
|
|
)
|
|
|
|
# Initialize A* search
|
|
initial_node = PlanNode(
|
|
f_score=self.heuristic.distance_to_goal(world_state, goal),
|
|
g_score=0.0,
|
|
h_score=self.heuristic.distance_to_goal(world_state, goal),
|
|
world_state=deepcopy(world_state),
|
|
depth=0
|
|
)
|
|
|
|
# Priority queue: (f_score, counter, node)
|
|
counter = 0
|
|
open_set = [(initial_node.f_score, counter, initial_node)]
|
|
|
|
# Visited states with best g_score
|
|
visited: Dict[int, float] = {hash(initial_node): 0.0}
|
|
|
|
# For path reconstruction
|
|
came_from: Dict[int, Tuple[int, Action]] = {}
|
|
|
|
nodes_expanded = 0
|
|
|
|
while open_set and nodes_expanded < self.max_nodes:
|
|
# Check timeout
|
|
if time.time() - start_time > self.timeout_seconds:
|
|
break
|
|
|
|
# Get node with lowest f_score
|
|
_, _, current = heapq.heappop(open_set)
|
|
nodes_expanded += 1
|
|
|
|
# Check if goal is satisfied in this state
|
|
goal_satisfaction = goal.evaluate(current.world_state)
|
|
if goal_satisfaction >= 0.95:
|
|
# Reconstruct plan
|
|
plan = self._reconstruct_plan(
|
|
goal, current, came_from, initial_node
|
|
)
|
|
self._update_stats(start_time, True)
|
|
return plan
|
|
|
|
# Expand node if not at max depth
|
|
if current.depth >= self.max_depth:
|
|
continue
|
|
|
|
# Generate successors
|
|
for action in self.action_library.get_all():
|
|
# Skip forbidden actions
|
|
if action.name in forbidden:
|
|
continue
|
|
|
|
# Skip if preconditions not met
|
|
if not action.check_preconditions(current.world_state):
|
|
continue
|
|
|
|
# Apply action effects
|
|
new_state = action.apply_effects(current.world_state)
|
|
|
|
# Create new node
|
|
g_score = current.g_score + action.cost
|
|
h_score = self.heuristic.distance_to_goal(new_state, goal)
|
|
f_score = g_score + h_score
|
|
|
|
new_node = PlanNode(
|
|
f_score=f_score,
|
|
g_score=g_score,
|
|
h_score=h_score,
|
|
world_state=new_state,
|
|
action=action,
|
|
parent=current,
|
|
depth=current.depth + 1
|
|
)
|
|
|
|
node_hash = hash(new_node)
|
|
|
|
# Check if we've seen this state with a better path
|
|
if node_hash in visited and visited[node_hash] <= g_score:
|
|
continue
|
|
|
|
visited[node_hash] = g_score
|
|
came_from[node_hash] = (hash(current), action)
|
|
|
|
counter += 1
|
|
heapq.heappush(open_set, (f_score, counter, new_node))
|
|
|
|
# Planning failed
|
|
self._update_stats(start_time, False)
|
|
return None
|
|
|
|
def _reconstruct_plan(
|
|
self,
|
|
goal: Goal,
|
|
end_node: PlanNode,
|
|
came_from: Dict[int, Tuple[int, Action]],
|
|
start_node: PlanNode
|
|
) -> Plan:
|
|
"""Reconstruct action sequence from came_from map"""
|
|
actions = []
|
|
current_hash = hash(end_node)
|
|
start_hash = hash(start_node)
|
|
|
|
total_cost = 0.0
|
|
estimated_duration = 0.0
|
|
|
|
while current_hash != start_hash:
|
|
if current_hash not in came_from:
|
|
break
|
|
|
|
prev_hash, action = came_from[current_hash]
|
|
actions.append(action)
|
|
total_cost += action.cost
|
|
# Estimate 30 seconds per action
|
|
estimated_duration += 30.0
|
|
current_hash = prev_hash
|
|
|
|
actions.reverse()
|
|
|
|
return Plan(
|
|
goal=goal,
|
|
actions=actions,
|
|
estimated_cost=total_cost,
|
|
estimated_duration=estimated_duration
|
|
)
|
|
|
|
def _update_stats(self, start_time: float, succeeded: bool):
|
|
"""Update planning statistics"""
|
|
elapsed = time.time() - start_time
|
|
self.plans_created += 1
|
|
self.total_planning_time += elapsed
|
|
self.average_planning_time = self.total_planning_time / self.plans_created
|
|
if succeeded:
|
|
self.plans_succeeded += 1
|
|
|
|
def plan_multi_goal(
|
|
self,
|
|
goals: List[Goal],
|
|
world_state: Dict[str, Any],
|
|
strategy: str = 'sequential'
|
|
) -> List[Plan]:
|
|
"""
|
|
Plan for multiple goals.
|
|
Strategies: 'sequential', 'interleaved', 'prioritized'
|
|
"""
|
|
plans = []
|
|
current_state = deepcopy(world_state)
|
|
|
|
if strategy == 'sequential':
|
|
# Plan for each goal in sequence
|
|
for goal in sorted(goals, key=lambda g: g.state.effective_priority, reverse=True):
|
|
plan = self.plan(goal, current_state)
|
|
if plan:
|
|
plans.append(plan)
|
|
# Update state as if plan was executed
|
|
for action in plan.actions:
|
|
current_state = action.apply_effects(current_state)
|
|
|
|
elif strategy == 'prioritized':
|
|
# Plan only for highest priority goal
|
|
if goals:
|
|
top_goal = max(goals, key=lambda g: g.state.effective_priority)
|
|
plan = self.plan(top_goal, current_state)
|
|
if plan:
|
|
plans.append(plan)
|
|
|
|
return plans
|
|
|
|
def replan(
|
|
self,
|
|
current_plan: Plan,
|
|
world_state: Dict[str, Any],
|
|
failed_action_index: int
|
|
) -> Optional[Plan]:
|
|
"""
|
|
Replan from current state after an action failure.
|
|
"""
|
|
# Get remaining actions after failed action
|
|
remaining_actions = current_plan.actions[failed_action_index + 1:]
|
|
|
|
# Try to plan from current state
|
|
new_plan = self.plan(current_plan.goal, world_state)
|
|
|
|
if new_plan:
|
|
return new_plan
|
|
|
|
# If planning failed, try with relaxed constraints
|
|
# or plan for sub-goals
|
|
return None
|
|
|
|
def get_stats(self) -> Dict:
|
|
"""Get planner statistics"""
|
|
return {
|
|
'plans_created': self.plans_created,
|
|
'plans_succeeded': self.plans_succeeded,
|
|
'success_rate': self.plans_succeeded / max(1, self.plans_created),
|
|
'average_planning_time': self.average_planning_time,
|
|
'total_planning_time': self.total_planning_time,
|
|
'max_depth': self.max_depth,
|
|
'max_nodes': self.max_nodes,
|
|
'timeout_seconds': self.timeout_seconds
|
|
}
|
|
|
|
|
|
class PlanOptimizer:
|
|
"""Optimizes plans for better performance"""
|
|
|
|
@staticmethod
|
|
def remove_redundancies(plan: Plan) -> Plan:
|
|
"""Remove redundant actions from plan"""
|
|
if not plan.actions:
|
|
return plan
|
|
|
|
optimized = []
|
|
last_effects = {}
|
|
|
|
for action in plan.actions:
|
|
# Check if action effects are already satisfied
|
|
redundant = True
|
|
for key, value in action.effects.items():
|
|
if last_effects.get(key) != value:
|
|
redundant = False
|
|
break
|
|
|
|
if not redundant:
|
|
optimized.append(action)
|
|
last_effects.update(action.effects)
|
|
|
|
return Plan(
|
|
goal=plan.goal,
|
|
actions=optimized,
|
|
estimated_cost=sum(a.cost for a in optimized),
|
|
estimated_duration=len(optimized) * 30.0
|
|
)
|
|
|
|
@staticmethod
|
|
def parallelize_actions(plan: Plan) -> List[List[Action]]:
|
|
"""
|
|
Identify actions that can be executed in parallel.
|
|
Returns list of action groups.
|
|
"""
|
|
if not plan.actions:
|
|
return []
|
|
|
|
parallel_groups = []
|
|
current_group = []
|
|
|
|
for action in plan.actions:
|
|
if not current_group:
|
|
current_group.append(action)
|
|
else:
|
|
# Check if action is independent of current group
|
|
independent = True
|
|
for grouped_action in current_group:
|
|
# Check for effect/precondition overlap
|
|
if set(action.preconditions.keys()) & set(grouped_action.effects.keys()):
|
|
independent = False
|
|
break
|
|
|
|
if independent:
|
|
current_group.append(action)
|
|
else:
|
|
parallel_groups.append(current_group)
|
|
current_group = [action]
|
|
|
|
if current_group:
|
|
parallel_groups.append(current_group)
|
|
|
|
return parallel_groups
|
|
|
|
|
|
# =============================================================================
|
|
# PLAN LEARNING
|
|
# =============================================================================
|
|
|
|
class PlanLibrary:
|
|
"""Library of successful plans for reuse"""
|
|
|
|
def __init__(self, storage_path: Optional[str] = None):
|
|
self.storage_path = storage_path or "/root/allegro/goap/plan_library.json"
|
|
self.plans: Dict[str, List[Plan]] = {} # goal_name -> list of plans
|
|
self._load()
|
|
|
|
def _load(self):
|
|
"""Load plan library from disk"""
|
|
import os
|
|
if os.path.exists(self.storage_path):
|
|
try:
|
|
with open(self.storage_path, 'r') as f:
|
|
data = json.load(f)
|
|
# Plans would need proper deserialization
|
|
self.plans = {}
|
|
except:
|
|
self.plans = {}
|
|
|
|
def save(self):
|
|
"""Save plan library to disk"""
|
|
with open(self.storage_path, 'w') as f:
|
|
json.dump({
|
|
goal: [p.to_dict() for p in plans]
|
|
for goal, plans in self.plans.items()
|
|
}, f, indent=2)
|
|
|
|
def add_plan(self, plan: Plan):
|
|
"""Add a successful plan to the library"""
|
|
goal_name = plan.goal.name
|
|
if goal_name not in self.plans:
|
|
self.plans[goal_name] = []
|
|
self.plans[goal_name].append(plan)
|
|
self.save()
|
|
|
|
def find_similar_plan(
|
|
self,
|
|
goal: Goal,
|
|
world_state: Dict[str, Any]
|
|
) -> Optional[Plan]:
|
|
"""Find a similar successful plan"""
|
|
if goal.name not in self.plans:
|
|
return None
|
|
|
|
# Find plan with most similar initial state
|
|
best_match = None
|
|
best_similarity = 0.0
|
|
|
|
for plan in self.plans[goal.name]:
|
|
# Simple heuristic: plan that achieved the goal
|
|
if plan.status == PlanStatus.COMPLETED:
|
|
return plan
|
|
|
|
return None
|
|
|
|
|
|
# Singleton planner instance
|
|
planner = GOAPPlanner()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Test the planner
|
|
print("=== GOAP Planner Module Test ===")
|
|
|
|
from goals import SystemHealthGoal
|
|
from actions import CheckSystemHealth, CleanupResources
|
|
|
|
# Create test goal and state
|
|
goal = SystemHealthGoal()
|
|
world_state = {
|
|
'system': {
|
|
'cpu_percent': 45,
|
|
'memory_percent': 60,
|
|
'disk_percent': 85, # High disk usage - needs cleanup
|
|
'uptime_hours': 48,
|
|
'recent_errors': 2,
|
|
'health_checked': False
|
|
}
|
|
}
|
|
|
|
print(f"\nGoal: {goal.name}")
|
|
print(f"Initial satisfaction: {goal.evaluate(world_state):.2f}")
|
|
print(f"Desired state: {goal.get_desired_state()}")
|
|
|
|
# Create plan
|
|
print("\n=== Planning ===")
|
|
start = time.time()
|
|
plan = planner.plan(goal, world_state)
|
|
elapsed = time.time() - start
|
|
|
|
if plan:
|
|
print(f"Plan found in {elapsed:.3f}s")
|
|
print(f"Actions: {len(plan.actions)}")
|
|
print(f"Estimated cost: {plan.estimated_cost}")
|
|
for i, action in enumerate(plan.actions):
|
|
print(f" {i+1}. {action.name}")
|
|
else:
|
|
print("No plan found")
|
|
|
|
print("\n=== Planner Stats ===")
|
|
print(json.dumps(planner.get_stats(), indent=2))
|