{
  "dataset": "glossary",
  "record": {
    "id": "embeddings",
    "term": "Embeddings",
    "category": "knowledge-memory",
    "short_def": "Dense numeric vectors that represent text (or other data) so that semantically similar items sit close together in vector space.",
    "long_def": "The modern approach was crystallized by Word2vec (Mikolov, Chen, Corrado and Dean at Google, 2013), which learned high-quality dense word vectors that captured meaning by context. Today, embedding models turn documents and queries into vectors so similarity search can find relevant content by meaning rather than keyword — the retrieval engine under RAG and vector databases.",
    "see_also": [
      "vector-database",
      "rag",
      "agentic-rag"
    ],
    "etymology_origin": "The dense-vector approach was established by Word2vec, introduced by Tomáš Mikolov, Kai Chen, Greg Corrado and Jeffrey Dean at Google in 2013 (arXiv 1301.3781); the broader 'word embedding' concept predates it.",
    "related_to": [
      "vector-database",
      "rag",
      "agentic-rag",
      "grounding"
    ],
    "contrast_with": "Unlike keyword indexing, which matches exact tokens, embeddings place text in a continuous vector space so 'meaning-similar' content is found by vector distance — semantic match rather than lexical match.",
    "example": "Word2vec (Mikolov et al., Google, 2013) showed dense word vectors capture semantics so well that vector arithmetic like 'king − man + woman ≈ queen' holds.",
    "source": "https://en.wikipedia.org/wiki/Word_embedding",
    "status": "active",
    "why_it_matters": "Embeddings are how agents retrieve your content by meaning; clean, well-structured text embeds and retrieves better, making your site easier to surface in RAG answers.",
    "sameAs": [
      "https://en.wikipedia.org/wiki/Word_embedding",
      "https://en.wikipedia.org/wiki/Word2vec"
    ],
    "bridge_entity": "models",
    "last_verified": "2026-06-15",
    "md_twin": "/glossary/embeddings.md"
  }
}