{
  "dataset": "crawlers",
  "record": {
    "id": "ccbot",
    "name": "CCBot",
    "operator": "Common Crawl",
    "purpose": "training",
    "ua_substring": "CCBot",
    "robots_token": "CCBot",
    "respects_robots": true,
    "verify": "Common Crawl publishes its crawler IP ranges",
    "notes": "Builds the open Common Crawl corpus that many model trainers ingest downstream. Blocking CCBot blocks an upstream training-data source for the whole ecosystem.",
    "canonical_name": "CCBot",
    "user_agent_token": "CCBot",
    "ua_full": {
      "value": "CCBot/2.0 (https://commoncrawl.org/faq/)",
      "source": "https://commoncrawl.org/ccbot",
      "last_verified": "2026-06-15"
    },
    "bot_type": "training",
    "bot_type_extension": null,
    "opt_out_mechanism": "robots.txt disallow (User-agent: CCBot)",
    "published_ip_range_url": "https://index.commoncrawl.org/ccbot.json",
    "asn": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://index.commoncrawl.org/ccbot.json"
    },
    "reverse_dns_suffix": {
      "value": ".crawl.commoncrawl.org",
      "source": "https://commoncrawl.org/ccbot",
      "last_verified": "2026-06-15"
    },
    "supports_web_bot_auth": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://commoncrawl.org/ccbot"
    },
    "signature_agent_domain": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://commoncrawl.org/ccbot"
    },
    "jwks_url": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://commoncrawl.org/ccbot"
    },
    "verification_methods": [
      "published-IP-range",
      "reverse-DNS"
    ],
    "crawl_traffic_share": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://radar.cloudflare.com/bots"
    },
    "targeted_content_type": "HTML, text",
    "documentation_url": "https://commoncrawl.org/ccbot",
    "first_seen_date": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://commoncrawl.org/ccbot"
    },
    "last_verified_date": "2026-06-15",
    "block_vs_allow_recommendation": "conditional — upstream open-corpus crawler; allowing it feeds many downstream trainers (broad reach), blocking removes you from the Common Crawl corpus. No direct referral.",
    "citation_referral_value": "low (open training corpus; no direct citation or referral)",
    "cloudflare_verified_category": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://radar.cloudflare.com/bots/directory/ccbot"
    },
    "status": "active",
    "triples": [
      [
        "CCBot",
        "operated_by",
        "Common Crawl"
      ],
      [
        "CCBot",
        "has_bot_type",
        "training"
      ],
      [
        "CCBot",
        "verified_via",
        "published-IP-range"
      ],
      [
        "CCBot",
        "verified_via",
        "reverse-DNS"
      ]
    ],
    "attribute_sources": [
      {
        "claims": [
          "ua_full",
          "user_agent_token",
          "robots_token",
          "published_ip_range_url",
          "reverse_dns_suffix",
          "documentation_url"
        ],
        "source": "https://commoncrawl.org/ccbot",
        "last_verified": "2026-06-15"
      }
    ]
  }
}