{
  "dataset": "glossary",
  "record": {
    "id": "rag",
    "term": "Retrieval-Augmented Generation (RAG)",
    "category": "knowledge-memory",
    "short_def": "Fetching relevant documents at query time and giving them to the model as context, so answers are grounded in current, specific data.",
    "long_def": "The term was coined by Patrick Lewis and colleagues at Facebook AI Research (now Meta AI), University College London and NYU in a 2020 NeurIPS paper. RAG reduces hallucination and lets a model answer about information it was never trained on. Agent-friendly sites help RAG by exposing clean, chunkable content (markdown twins, llms.txt) and structured data.",
    "see_also": [
      "grounding",
      "markdown-twin"
    ],
    "etymology_origin": "Coined by Patrick Lewis et al. in 'Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks' (Facebook AI Research / UCL / NYU), arXiv 2005.11401, May 2020; presented at NeurIPS 2020.",
    "related_to": [
      "grounding",
      "markdown-twin",
      "json-ld"
    ],
    "contrast_with": "Unlike fine-tuning, which bakes knowledge into model weights, RAG fetches documents at query time and supplies them as context — knowledge stays external, current and citable.",
    "example": "The seminal 2020 RAG paper (Lewis et al., arXiv 2005.11401) combined dense-passage retrieval with a BART generator and set state-of-the-art on three open-domain QA benchmarks.",
    "source": "https://arxiv.org/abs/2005.11401",
    "status": "active",
    "why_it_matters": "RAG is how most AI answer engines ground responses; sites that expose clean, retrievable content get pulled into and cited by those answers.",
    "sameAs": [
      "https://en.wikipedia.org/wiki/Retrieval-augmented_generation"
    ],
    "bridge_entity": "models",
    "last_verified": "2026-06-15",
    "md_twin": "/glossary/rag.md"
  }
}