turboquant/reports/test-matrix-2026-04-14.json

{
  "timestamp": "2026-04-15T02:07:45Z",
  "model": "qwen2.5:7b",
  "quality": {
    "total": 10,
    "passed": 10,
    "pass_rate": 1.0,
    "details": [
      {
        "id": 1,
        "category": "factual",
        "prompt": "What are the three laws of thermodynamics?",
        "pattern_matched": true,
        "tok_s": 53.0,
        "response_len": 1655
      },
      {
        "id": 2,
        "category": "code_generation",
        "prompt": "Write a Python function to merge two sorted lists into a single sorted list without using built-in s",
        "pattern_matched": true,
        "tok_s": 50.9,
        "response_len": 1801
      },
      {
        "id": 3,
        "category": "reasoning",
        "prompt": "If all A are B, and some B are C, what can we conclude about the relationship between A and C? Expla",
        "pattern_matched": true,
        "tok_s": 51.4,
        "response_len": 1787
      },
      {
        "id": 4,
        "category": "long_form_writing",
        "prompt": "Write a 500-word essay on the sovereignty of local AI. Discuss why local inference matters for priva",
        "pattern_matched": true,
        "tok_s": 52.6,
        "response_len": 3139
      },
      {
        "id": 5,
        "category": "summarization",
        "prompt": "Summarize the following passage in approximately 100 words:\n\nThe concept of artificial intelligence ",
        "pattern_matched": true,
        "tok_s": 54.2,
        "response_len": 664
      },
      {
        "id": 6,
        "category": "tool_call_format",
        "prompt": "Read the file at ~/SOUL.md and quote the prime directive. Format your response as a JSON object with",
        "pattern_matched": true,
        "tok_s": 53.9,
        "response_len": 374
      },
      {
        "id": 7,
        "category": "multi_turn_context",
        "prompt": "Remember this number: 7429. Simply acknowledge that you've received it.",
        "pattern_matched": true,
        "tok_s": 58.1,
        "response_len": 98
      },
      {
        "id": 8,
        "category": "math",
        "prompt": "What is 17 * 23 + 156 / 12? Show your work step by step.",
        "pattern_matched": true,
        "tok_s": 53.6,
        "response_len": 731
      },
      {
        "id": 9,
        "category": "creative",
        "prompt": "Write a haiku about a machine learning model that dreams.",
        "pattern_matched": true,
        "tok_s": 55.4,
        "response_len": 74
      },
      {
        "id": 10,
        "category": "instruction_following",
        "prompt": "List 5 programming languages. Number them. Bold the third one. Put the entire list in a code block.",
        "pattern_matched": true,
        "tok_s": 52.6,
        "response_len": 58
      }
    ]
  },
  "perplexity": {
    "corpus": "corpora/wiki.test.raw",
    "chunks_scored": 10,
    "avg_tok_s": 42.9,
    "note": "Proxy metric \u2014 real PPL requires logprob support",
    "passed": true
  },
  "needle_in_haystack": {
    "total": 3,
    "passed": 3,
    "details": {
      "8K": {
        "retrieved": true,
        "tok_s": 50.0,
        "response_excerpt": "The secret code in the text is clearly stated at the beginning: **TURBOQUANT-7742**.\n\nThis appears t"
      },
      "16K": {
        "retrieved": true,
        "tok_s": 40.5,
        "response_excerpt": "The secret code in the text is \"TURBOQUANT-7742\". This message is hidden within the repetitive phras"
      },
      "32K": {
        "retrieved": true,
        "tok_s": 38.7,
        "response_excerpt": "The secret code in the text is clearly stated as \"TURBOQUANT-7742\". This appears after a series of s"
      }
    }
  },
  "performance": {},
  "context_ceiling": {},
  "go_no_go": "NO-GO",
  "issues": [
    "Context ceiling: 0 < 64K required"
  ]
}