{
  "dataset": "crawlers",
  "record": {
    "id": "icc-crawler",
    "name": "ICC-Crawler",
    "operator": "NICT (National Institute of Information and Communications Technology)",
    "purpose": "training",
    "ua_substring": "ICC-Crawler",
    "robots_token": "ICC-Crawler",
    "respects_robots": true,
    "verify": "verify by user-agent + edge controls; the ai.robots.txt registry records respects-robots = Yes. No operator-published IP-range file confirmed.",
    "notes": "Crawls data to train and support AI technologies; NICT (Japan) uses the collected data for AI and may provide it to third parties, including commercial companies. Token and operator recorded in the ai.robots.txt machine-readable registry.",
    "canonical_name": "ICC-Crawler",
    "user_agent_token": "ICC-Crawler",
    "ua_full": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://github.com/ai-robots-txt/ai.robots.txt/blob/main/robots.json"
    },
    "bot_type": "training",
    "bot_type_extension": null,
    "opt_out_mechanism": "robots.txt disallow (User-agent: ICC-Crawler)",
    "published_ip_range_url": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://github.com/ai-robots-txt/ai.robots.txt/blob/main/robots.json",
      "note": "no authoritative NICT-published IP-range JSON confirmed at primary source"
    },
    "asn": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://github.com/ai-robots-txt/ai.robots.txt/blob/main/robots.json"
    },
    "reverse_dns_suffix": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://github.com/ai-robots-txt/ai.robots.txt/blob/main/robots.json"
    },
    "supports_web_bot_auth": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://github.com/ai-robots-txt/ai.robots.txt/blob/main/robots.json"
    },
    "signature_agent_domain": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://github.com/ai-robots-txt/ai.robots.txt/blob/main/robots.json"
    },
    "jwks_url": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://github.com/ai-robots-txt/ai.robots.txt/blob/main/robots.json"
    },
    "verification_methods": [
      "user-agent-match"
    ],
    "crawl_traffic_share": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://radar.cloudflare.com/bots"
    },
    "targeted_content_type": "HTML, text",
    "documentation_url": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://github.com/ai-robots-txt/ai.robots.txt/blob/main/robots.json",
      "note": "no dedicated NICT operator documentation page confirmed; token/operator/respects sourced from the ai.robots.txt registry"
    },
    "first_seen_date": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://github.com/ai-robots-txt/ai.robots.txt/blob/main/robots.json"
    },
    "last_verified_date": "2026-06-15",
    "block_vs_allow_recommendation": "conditional — research/training crawler that may share collected data with third parties incl. commercial companies; allow to be represented, block via robots.txt to opt out. No direct referral.",
    "citation_referral_value": "low (training/data collection; no direct citation or referral)",
    "cloudflare_verified_category": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://radar.cloudflare.com/bots/directory/icc-crawler"
    },
    "status": "active",
    "triples": [
      [
        "ICC-Crawler",
        "operated_by",
        "NICT"
      ],
      [
        "ICC-Crawler",
        "has_bot_type",
        "training"
      ],
      [
        "ICC-Crawler",
        "verified_via",
        "user-agent-match"
      ]
    ],
    "attribute_sources": [
      {
        "claims": [
          "user_agent_token",
          "robots_token",
          "operator",
          "respects_robots",
          "purpose"
        ],
        "source": "https://github.com/ai-robots-txt/ai.robots.txt/blob/main/robots.json",
        "last_verified": "2026-06-15"
      }
    ]
  }
}