diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py
index 113fe3dd7..680233b0f 100644
--- a/tests/tools/test_delegate.py
+++ b/tests/tools/test_delegate.py
@@ -246,6 +246,169 @@ class TestDelegateTask(unittest.TestCase):
             self.assertEqual(kwargs["api_mode"], parent.api_mode)
 
 
+class TestDelegateObservability(unittest.TestCase):
+    """Tests for enriched metadata returned by _run_single_child."""
+
+    def test_observability_fields_present(self):
+        """Completed child should return tool_trace, tokens, model, exit_reason."""
+        parent = _make_mock_parent(depth=0)
+
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = MagicMock()
+            mock_child.model = "claude-sonnet-4-6"
+            mock_child.session_prompt_tokens = 5000
+            mock_child.session_completion_tokens = 1200
+            mock_child.run_conversation.return_value = {
+                "final_response": "done",
+                "completed": True,
+                "interrupted": False,
+                "api_calls": 3,
+                "messages": [
+                    {"role": "user", "content": "do something"},
+                    {"role": "assistant", "tool_calls": [
+                        {"id": "tc_1", "function": {"name": "web_search", "arguments": '{"query": "test"}'}}
+                    ]},
+                    {"role": "tool", "tool_call_id": "tc_1", "content": '{"results": [1,2,3]}'},
+                    {"role": "assistant", "content": "done"},
+                ],
+            }
+            MockAgent.return_value = mock_child
+
+            result = json.loads(delegate_task(goal="Test observability", parent_agent=parent))
+            entry = result["results"][0]
+
+            # Core observability fields
+            self.assertEqual(entry["model"], "claude-sonnet-4-6")
+            self.assertEqual(entry["exit_reason"], "completed")
+            self.assertEqual(entry["tokens"]["input"], 5000)
+            self.assertEqual(entry["tokens"]["output"], 1200)
+
+            # Tool trace
+            self.assertEqual(len(entry["tool_trace"]), 1)
+            self.assertEqual(entry["tool_trace"][0]["tool"], "web_search")
+            self.assertIn("args_bytes", entry["tool_trace"][0])
+            self.assertIn("result_bytes", entry["tool_trace"][0])
+            self.assertEqual(entry["tool_trace"][0]["status"], "ok")
+
+    def test_tool_trace_detects_error(self):
+        """Tool results containing 'error' should be marked as error status."""
+        parent = _make_mock_parent(depth=0)
+
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = MagicMock()
+            mock_child.model = "claude-sonnet-4-6"
+            mock_child.session_prompt_tokens = 0
+            mock_child.session_completion_tokens = 0
+            mock_child.run_conversation.return_value = {
+                "final_response": "failed",
+                "completed": True,
+                "interrupted": False,
+                "api_calls": 1,
+                "messages": [
+                    {"role": "assistant", "tool_calls": [
+                        {"id": "tc_1", "function": {"name": "terminal", "arguments": '{"cmd": "ls"}'}}
+                    ]},
+                    {"role": "tool", "tool_call_id": "tc_1", "content": "Error: command not found"},
+                ],
+            }
+            MockAgent.return_value = mock_child
+
+            result = json.loads(delegate_task(goal="Test error trace", parent_agent=parent))
+            trace = result["results"][0]["tool_trace"]
+            self.assertEqual(trace[0]["status"], "error")
+
+    def test_parallel_tool_calls_paired_correctly(self):
+        """Parallel tool calls should each get their own result via tool_call_id matching."""
+        parent = _make_mock_parent(depth=0)
+
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = MagicMock()
+            mock_child.model = "claude-sonnet-4-6"
+            mock_child.session_prompt_tokens = 3000
+            mock_child.session_completion_tokens = 800
+            mock_child.run_conversation.return_value = {
+                "final_response": "done",
+                "completed": True,
+                "interrupted": False,
+                "api_calls": 1,
+                "messages": [
+                    {"role": "assistant", "tool_calls": [
+                        {"id": "tc_a", "function": {"name": "web_search", "arguments": '{"q": "a"}'}},
+                        {"id": "tc_b", "function": {"name": "web_search", "arguments": '{"q": "b"}'}},
+                        {"id": "tc_c", "function": {"name": "terminal", "arguments": '{"cmd": "ls"}'}},
+                    ]},
+                    {"role": "tool", "tool_call_id": "tc_a", "content": '{"ok": true}'},
+                    {"role": "tool", "tool_call_id": "tc_b", "content": "Error: rate limited"},
+                    {"role": "tool", "tool_call_id": "tc_c", "content": "file1.txt\nfile2.txt"},
+                    {"role": "assistant", "content": "done"},
+                ],
+            }
+            MockAgent.return_value = mock_child
+
+            result = json.loads(delegate_task(goal="Test parallel", parent_agent=parent))
+            trace = result["results"][0]["tool_trace"]
+
+            # All three tool calls should have results
+            self.assertEqual(len(trace), 3)
+
+            # First: web_search → ok
+            self.assertEqual(trace[0]["tool"], "web_search")
+            self.assertEqual(trace[0]["status"], "ok")
+            self.assertIn("result_bytes", trace[0])
+
+            # Second: web_search → error
+            self.assertEqual(trace[1]["tool"], "web_search")
+            self.assertEqual(trace[1]["status"], "error")
+            self.assertIn("result_bytes", trace[1])
+
+            # Third: terminal → ok
+            self.assertEqual(trace[2]["tool"], "terminal")
+            self.assertEqual(trace[2]["status"], "ok")
+            self.assertIn("result_bytes", trace[2])
+
+    def test_exit_reason_interrupted(self):
+        """Interrupted child should report exit_reason='interrupted'."""
+        parent = _make_mock_parent(depth=0)
+
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = MagicMock()
+            mock_child.model = "claude-sonnet-4-6"
+            mock_child.session_prompt_tokens = 0
+            mock_child.session_completion_tokens = 0
+            mock_child.run_conversation.return_value = {
+                "final_response": "",
+                "completed": False,
+                "interrupted": True,
+                "api_calls": 2,
+                "messages": [],
+            }
+            MockAgent.return_value = mock_child
+
+            result = json.loads(delegate_task(goal="Test interrupt", parent_agent=parent))
+            self.assertEqual(result["results"][0]["exit_reason"], "interrupted")
+
+    def test_exit_reason_max_iterations(self):
+        """Child that didn't complete and wasn't interrupted hit max_iterations."""
+        parent = _make_mock_parent(depth=0)
+
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = MagicMock()
+            mock_child.model = "claude-sonnet-4-6"
+            mock_child.session_prompt_tokens = 0
+            mock_child.session_completion_tokens = 0
+            mock_child.run_conversation.return_value = {
+                "final_response": "",
+                "completed": False,
+                "interrupted": False,
+                "api_calls": 50,
+                "messages": [],
+            }
+            MockAgent.return_value = mock_child
+
+            result = json.loads(delegate_task(goal="Test max iter", parent_agent=parent))
+            self.assertEqual(result["results"][0]["exit_reason"], "max_iterations")
+
+
 class TestBlockedTools(unittest.TestCase):
     def test_blocked_tools_constant(self):
         for tool in ["delegate_task", "clarify", "memory", "send_message", "execute_code"]:
diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py
index 8ade49fe0..76026be59 100644
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -276,12 +276,70 @@ def _run_single_child(
         else:
             status = "failed"
 
+        # Build tool trace from conversation messages (already in memory).
+        # Uses tool_call_id to correctly pair parallel tool calls with results.
+        tool_trace: list[Dict[str, Any]] = []
+        trace_by_id: Dict[str, Dict[str, Any]] = {}
+        messages = result.get("messages") or []
+        if isinstance(messages, list):
+            for msg in messages:
+                if not isinstance(msg, dict):
+                    continue
+                if msg.get("role") == "assistant":
+                    for tc in (msg.get("tool_calls") or []):
+                        fn = tc.get("function", {})
+                        entry_t = {
+                            "tool": fn.get("name", "unknown"),
+                            "args_bytes": len(fn.get("arguments", "")),
+                        }
+                        tool_trace.append(entry_t)
+                        tc_id = tc.get("id")
+                        if tc_id:
+                            trace_by_id[tc_id] = entry_t
+                elif msg.get("role") == "tool":
+                    content = msg.get("content", "")
+                    is_error = bool(
+                        content and "error" in content[:80].lower()
+                    )
+                    result_meta = {
+                        "result_bytes": len(content),
+                        "status": "error" if is_error else "ok",
+                    }
+                    # Match by tool_call_id for parallel calls
+                    tc_id = msg.get("tool_call_id")
+                    target = trace_by_id.get(tc_id) if tc_id else None
+                    if target is not None:
+                        target.update(result_meta)
+                    elif tool_trace:
+                        # Fallback for messages without tool_call_id
+                        tool_trace[-1].update(result_meta)
+
+        # Determine exit reason
+        if interrupted:
+            exit_reason = "interrupted"
+        elif completed:
+            exit_reason = "completed"
+        else:
+            exit_reason = "max_iterations"
+
+        # Extract token counts (safe for mock objects)
+        _input_tokens = getattr(child, "session_prompt_tokens", 0)
+        _output_tokens = getattr(child, "session_completion_tokens", 0)
+        _model = getattr(child, "model", None)
+
         entry: Dict[str, Any] = {
             "task_index": task_index,
             "status": status,
             "summary": summary,
             "api_calls": api_calls,
             "duration_seconds": duration,
+            "model": _model if isinstance(_model, str) else None,
+            "exit_reason": exit_reason,
+            "tokens": {
+                "input": _input_tokens if isinstance(_input_tokens, (int, float)) else 0,
+                "output": _output_tokens if isinstance(_output_tokens, (int, float)) else 0,
+            },
+            "tool_trace": tool_trace,
         }
         if status == "failed":
             entry["error"] = result.get("error", "Subagent did not produce a response.")