{
  "dataset": "glossary",
  "record": {
    "id": "vector-database",
    "term": "Vector Database",
    "category": "knowledge-memory",
    "short_def": "A database built to store embeddings and retrieve the nearest ones to a query vector using approximate nearest-neighbor search.",
    "long_def": "A vector database (or vector store) indexes high-dimensional embeddings and finds the most similar ones with Approximate Nearest Neighbor (ANN) algorithms — commonly HNSW graphs or quantization — under metrics like cosine distance. It is the storage-and-retrieval backbone of RAG: documents go in as vectors, a query vector comes in, and the closest chunks come out as context.",
    "see_also": [
      "embeddings",
      "rag",
      "agentic-rag"
    ],
    "etymology_origin": "A database category that emerged with the rise of embedding-based retrieval; popularized by systems such as FAISS, Pinecone, Weaviate, Milvus and pgvector, using ANN indexes like HNSW.",
    "related_to": [
      "embeddings",
      "rag",
      "agentic-rag",
      "grounding"
    ],
    "contrast_with": "Unlike a relational database, which retrieves rows by exact field matches, a vector database retrieves items by approximate nearest-neighbor distance between embeddings — similarity ranking rather than exact lookup.",
    "example": "Vector databases such as Pinecone and Weaviate use HNSW-based Approximate Nearest Neighbor search over embeddings to return the most semantically similar documents to a query.",
    "source": "https://en.wikipedia.org/wiki/Vector_database",
    "status": "active",
    "why_it_matters": "The vector database is where your content lives once an AI system ingests it; content that chunks and embeds cleanly is retrieved more accurately into the answers agents generate.",
    "sameAs": [
      "https://en.wikipedia.org/wiki/Vector_database"
    ],
    "bridge_entity": "models",
    "last_verified": "2026-06-15",
    "md_twin": "/glossary/vector-database.md"
  }
}