{
  "dataset": "glossary",
  "record": {
    "id": "prompt-caching",
    "term": "Prompt Caching",
    "category": "knowledge-memory",
    "short_def": "Reusing the model's processed state for a repeated prompt prefix so identical leading context is not recomputed, cutting latency and cost.",
    "long_def": "Introduced by Anthropic on 14 August 2024, prompt caching marks a content block as a cache breakpoint; a later request that begins with the same exact bytes reads the cached state instead of reprocessing it. Cached input typically costs a fraction of normal input tokens (with a one-time write surcharge). It rewards stable, front-loaded context — a direct incentive to put durable, machine-readable material first.",
    "see_also": [
      "token-economics",
      "context-engineering",
      "agent-skills"
    ],
    "etymology_origin": "Introduced by Anthropic for the Claude API on 14 August 2024; caches a prompt prefix marked as a breakpoint, with an ephemeral (short-TTL) cache reused across subsequent requests.",
    "related_to": [
      "token-economics",
      "context-engineering",
      "agent-skills",
      "agentic-loop"
    ],
    "contrast_with": "Unlike RAG, which fetches new context per query, prompt caching reuses unchanged leading context across calls — it optimizes repeated identical input rather than retrieving fresh information.",
    "example": "Anthropic launched prompt caching on 14 August 2024; cached tokens cost a fraction of normal input tokens, rewarding stable, repeated prompt prefixes.",
    "source": "https://claude.com/blog/prompt-caching",
    "status": "active",
    "why_it_matters": "Prompt caching makes stable, front-loaded, machine-readable context cheaper to reuse across an agent's loop — another economic reason clean structure beats sprawling markup.",
    "sameAs": [],
    "bridge_entity": "agent-readiness",
    "last_verified": "2026-06-15",
    "md_twin": "/glossary/prompt-caching.md"
  }
}