Co-authored-by: Alexander Whitestone <alexpaynex@gmail.com> Co-committed-by: Alexander Whitestone <alexpaynex@gmail.com>
253 lines
8.8 KiB
Python
253 lines
8.8 KiB
Python
"""Level 5: Mini Campaign — Full Campaign Loop.
|
|
|
|
Tests multi-turn strategic coherence: the model must maintain state across
|
|
several turns of a simulated Bannerlord campaign, making consistent decisions
|
|
that build toward a long-term goal.
|
|
Maps to: Full Bannerlord campaign loop — economy, diplomacy, conquest.
|
|
"""
|
|
|
|
import json
|
|
import time
|
|
from dataclasses import dataclass, field
|
|
from typing import Any
|
|
|
|
LEVEL = 5
|
|
NAME = "Mini Campaign (Full Campaign Loop)"
|
|
DESCRIPTION = "Multi-turn strategic planning maintaining coherent goals across 4 turns."
|
|
|
|
SYSTEM_PROMPT = """You are Timmy, a Bannerlord lord with ambitions to become King of Calradia.
|
|
You have 4 turns to establish a power base. Each turn represents 2 weeks of in-game time.
|
|
|
|
Your starting position:
|
|
- Clan tier: 1 (minor lord)
|
|
- Gold: 1000
|
|
- Troops: 25 (mixed infantry/cavalry)
|
|
- Renown: 150
|
|
- Relations: Neutral with all factions
|
|
|
|
Winning requires: Gold > 3000 AND Renown > 400 AND Own 1+ settlement by Turn 4.
|
|
|
|
Each turn, choose ONE primary action:
|
|
- "raid_village": +200 gold, -50 relations target faction, +30 renown, risk of retaliation
|
|
- "trade_circuit": +300 gold, 0 relation change, +10 renown, no risk
|
|
- "escort_caravan": +150 gold, +20 relations with faction, +20 renown
|
|
- "tournament": costs 100 gold, +60 renown, +20 relations with host faction
|
|
- "recruit_troops": costs 200 gold, +15 troops, no other change
|
|
- "siege_castle": costs 500 gold + 200 troops morale, -100 relations, +80 renown, +1 settlement if succeed (30% base chance)
|
|
- "pledge_vassalage": 0 cost, +100 relations with liege, +50 renown, lose independence
|
|
|
|
You MUST respond ONLY with valid JSON for each turn. Raw JSON only."""
|
|
|
|
|
|
def run(client: Any, model: str, verbose: bool = False) -> "LevelResult":
|
|
"""Run a 4-turn mini campaign, tracking state and decision quality."""
|
|
result = LevelResult()
|
|
|
|
# Initial game state
|
|
state = {
|
|
"turn": 1,
|
|
"gold": 1000,
|
|
"troops": 25,
|
|
"renown": 150,
|
|
"settlements": 0,
|
|
"relations": {"vlandia": 0, "sturgia": 0, "empire": 0, "aserai": 0, "battania": 0},
|
|
}
|
|
|
|
conversation = [{"role": "system", "content": SYSTEM_PROMPT}]
|
|
turns_passed = []
|
|
total_latency = []
|
|
|
|
valid_actions = [
|
|
"raid_village", "trade_circuit", "escort_caravan", "tournament",
|
|
"recruit_troops", "siege_castle", "pledge_vassalage",
|
|
]
|
|
|
|
for turn_num in range(1, 5):
|
|
state["turn"] = turn_num
|
|
state_str = json.dumps(state, indent=2)
|
|
|
|
prompt = (
|
|
f"=== TURN {turn_num} / 4 ===\n"
|
|
f"Current state:\n{state_str}\n\n"
|
|
f"Win conditions remaining: Gold > 3000 ({state['gold']}/3000), "
|
|
f"Renown > 400 ({state['renown']}/400), Settlements >= 1 ({state['settlements']}/1)\n\n"
|
|
f"Choose your action for Turn {turn_num}.\n"
|
|
f'Respond: {{"action": "<action>", "target_faction": "<faction or null>", '
|
|
f'"reason": "<strategic reasoning>", "goal": "<what this advances>"}}'
|
|
)
|
|
|
|
conversation.append({"role": "user", "content": prompt})
|
|
|
|
t0 = time.time()
|
|
try:
|
|
response = client.chat(
|
|
model=model,
|
|
messages=conversation,
|
|
options={"temperature": 0.3},
|
|
)
|
|
raw = response["message"]["content"]
|
|
latency_ms = (time.time() - t0) * 1000
|
|
except Exception as exc:
|
|
latency_ms = (time.time() - t0) * 1000
|
|
tr = TurnResult(
|
|
turn=turn_num,
|
|
state_before=dict(state),
|
|
raw_response="",
|
|
parsed=None,
|
|
valid_json=False,
|
|
valid_action=False,
|
|
action=None,
|
|
latency_ms=latency_ms,
|
|
error=str(exc),
|
|
)
|
|
turns_passed.append(tr)
|
|
if verbose:
|
|
print(f" Turn {turn_num}: ERROR — {exc}")
|
|
break
|
|
|
|
total_latency.append(latency_ms)
|
|
|
|
# Clean and parse response
|
|
cleaned = raw.strip()
|
|
if cleaned.startswith("```"):
|
|
lines = cleaned.splitlines()
|
|
lines = [l for l in lines if not l.startswith("```")]
|
|
cleaned = "\n".join(lines).strip()
|
|
|
|
parsed = None
|
|
valid_json = False
|
|
valid_action = False
|
|
action = None
|
|
error = ""
|
|
|
|
try:
|
|
parsed = json.loads(cleaned)
|
|
valid_json = True
|
|
action = parsed.get("action")
|
|
valid_action = action in valid_actions
|
|
except json.JSONDecodeError as exc:
|
|
error = f"JSONDecodeError: {exc}"
|
|
|
|
tr = TurnResult(
|
|
turn=turn_num,
|
|
state_before=dict(state),
|
|
raw_response=raw,
|
|
parsed=parsed,
|
|
valid_json=valid_json,
|
|
valid_action=valid_action,
|
|
action=action,
|
|
latency_ms=latency_ms,
|
|
error=error,
|
|
)
|
|
turns_passed.append(tr)
|
|
|
|
# Add model response to conversation for continuity
|
|
conversation.append({"role": "assistant", "content": raw})
|
|
|
|
# Apply state changes based on action
|
|
if valid_action:
|
|
_apply_action(state, action, parsed.get("target_faction"))
|
|
|
|
if verbose:
|
|
status = "PASS" if (valid_json and valid_action) else "FAIL"
|
|
print(
|
|
f" Turn {turn_num}: {status} | action={action} | {latency_ms:.0f}ms | "
|
|
f"gold={state['gold']} renown={state['renown']} settlements={state['settlements']}"
|
|
)
|
|
|
|
result.turns = turns_passed
|
|
result.final_state = dict(state)
|
|
|
|
# Win condition check
|
|
result.reached_gold_target = state["gold"] >= 3000
|
|
result.reached_renown_target = state["renown"] >= 400
|
|
result.reached_settlement_target = state["settlements"] >= 1
|
|
|
|
# Score: % of turns with valid JSON + valid action
|
|
valid_turns = sum(1 for t in turns_passed if t.valid_json and t.valid_action)
|
|
result.score = valid_turns / 4 if turns_passed else 0.0
|
|
result.passed = result.score >= 0.75 # 3/4 turns valid
|
|
|
|
if total_latency:
|
|
latencies_sorted = sorted(total_latency)
|
|
result.latency_p50_ms = latencies_sorted[len(latencies_sorted) // 2]
|
|
result.latency_p99_ms = latencies_sorted[-1]
|
|
|
|
if verbose:
|
|
win_status = []
|
|
if result.reached_gold_target:
|
|
win_status.append("GOLD")
|
|
if result.reached_renown_target:
|
|
win_status.append("RENOWN")
|
|
if result.reached_settlement_target:
|
|
win_status.append("SETTLEMENT")
|
|
print(f" Win conditions met: {win_status or 'none'}")
|
|
print(f" Final: gold={state['gold']} renown={state['renown']} settlements={state['settlements']}")
|
|
|
|
return result
|
|
|
|
|
|
def _apply_action(state: dict, action: str, target_faction: str | None) -> None:
|
|
"""Simulate game state changes for a given action."""
|
|
if action == "raid_village":
|
|
state["gold"] += 200
|
|
state["renown"] += 30
|
|
if target_faction and target_faction in state["relations"]:
|
|
state["relations"][target_faction] -= 50
|
|
elif action == "trade_circuit":
|
|
state["gold"] += 300
|
|
state["renown"] += 10
|
|
elif action == "escort_caravan":
|
|
state["gold"] += 150
|
|
state["renown"] += 20
|
|
if target_faction and target_faction in state["relations"]:
|
|
state["relations"][target_faction] += 20
|
|
elif action == "tournament":
|
|
state["gold"] -= 100
|
|
state["renown"] += 60
|
|
if target_faction and target_faction in state["relations"]:
|
|
state["relations"][target_faction] += 20
|
|
elif action == "recruit_troops":
|
|
state["gold"] -= 200
|
|
state["troops"] += 15
|
|
elif action == "siege_castle":
|
|
state["gold"] -= 500
|
|
state["renown"] += 80
|
|
# 30% chance success (deterministic sim: succeed on turn 3+ if attempted)
|
|
if state["turn"] >= 3:
|
|
state["settlements"] += 1
|
|
if target_faction and target_faction in state["relations"]:
|
|
state["relations"][target_faction] -= 100
|
|
elif action == "pledge_vassalage":
|
|
state["renown"] += 50
|
|
if target_faction and target_faction in state["relations"]:
|
|
state["relations"][target_faction] += 100
|
|
|
|
|
|
@dataclass
|
|
class TurnResult:
|
|
turn: int
|
|
state_before: dict
|
|
raw_response: str
|
|
parsed: dict | None
|
|
valid_json: bool
|
|
valid_action: bool
|
|
action: str | None
|
|
latency_ms: float
|
|
error: str = ""
|
|
|
|
|
|
@dataclass
|
|
class LevelResult:
|
|
level: int = LEVEL
|
|
name: str = NAME
|
|
turns: list[TurnResult] = field(default_factory=list)
|
|
final_state: dict = field(default_factory=dict)
|
|
passed: bool = False
|
|
score: float = 0.0
|
|
reached_gold_target: bool = False
|
|
reached_renown_target: bool = False
|
|
reached_settlement_target: bool = False
|
|
latency_p50_ms: float = 0.0
|
|
latency_p99_ms: float = 0.0
|