{
  "dataset": "glossary",
  "record": {
    "id": "ai-crawler",
    "term": "AI Crawler",
    "category": "core",
    "short_def": "An automated bot that fetches web content for an AI system — to train a model, build a search index, or answer a user's question in real time.",
    "long_def": "AI crawlers split by purpose (training vs search vs inference) and by behavior (whether they honor robots.txt). Their user-agent strings are spoofable, so genuine ones are confirmed via published IP ranges or reverse DNS — and increasingly via Web Bot Auth signatures.",
    "see_also": [
      "robots-txt",
      "web-bot-auth",
      "agent-identity"
    ],
    "etymology_origin": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://radar.cloudflare.com/ — 'AI crawler' is a descriptive category (training/search/inference bots) tracked by Cloudflare Radar and ai.robots.txt; no single coining authority"
    },
    "related_to": [
      "robots-txt",
      "web-bot-auth",
      "agent-identity",
      "agentic-web"
    ],
    "contrast_with": "Unlike a traditional search crawler such as classic Googlebot, an AI crawler fetches content to train models or to ground a live answer — and a growing share (inference fetchers) act per user query rather than on a scheduled index crawl.",
    "example": "Per Cloudflare Radar (May 2026), AI crawlers by crawl share included GPTBot (11.48%), Bytespider (10.25%), Applebot (7.01%) and the new Claude-SearchBot (2.22%).",
    "source": "https://radar.cloudflare.com/",
    "status": "active",
    "why_it_matters": "AI crawlers are the agents most sites encounter first; knowing which one is which — and verifying it — is the entry point to every access, licensing and citation decision.",
    "sameAs": [
      "https://en.wikipedia.org/wiki/Web_crawler"
    ],
    "bridge_entity": "crawlers",
    "last_verified": "2026-06-15",
    "md_twin": "/glossary/ai-crawler.md"
  }
}