{
  "dataset": "crawlers",
  "record": {
    "id": "meta-externalagent",
    "name": "Meta-ExternalAgent",
    "operator": "Meta",
    "purpose": "training",
    "ua_substring": "meta-externalagent",
    "robots_token": "meta-externalagent",
    "respects_robots": true,
    "verify": "Meta publishes crawler IP ranges; confirm against those",
    "notes": "Crawls content to train Meta's Llama models and AI products.",
    "canonical_name": "Meta-ExternalAgent",
    "user_agent_token": "meta-externalagent",
    "ua_full": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://developers.facebook.com/docs/sharing/webmasters/crawler",
      "note": "UA string reported across secondary sources and matching Meta's documented crawler URL; confirm exact version at build"
    },
    "bot_type": "training",
    "bot_type_extension": null,
    "opt_out_mechanism": "robots.txt disallow (User-agent: meta-externalagent)",
    "published_ip_range_url": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://developers.facebook.com/docs/sharing/webmasters/crawler",
      "note": "Meta documents the crawler but no dedicated authoritative IP-range JSON file was confirmed at primary source; requests come from Meta/Facebook IP space"
    },
    "asn": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://developers.facebook.com/docs/sharing/webmasters/crawler"
    },
    "reverse_dns_suffix": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://developers.facebook.com/docs/sharing/webmasters/crawler"
    },
    "supports_web_bot_auth": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://developers.facebook.com/docs/sharing/webmasters/crawler"
    },
    "signature_agent_domain": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://developers.facebook.com/docs/sharing/webmasters/crawler"
    },
    "jwks_url": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://developers.facebook.com/docs/sharing/webmasters/crawler"
    },
    "verification_methods": [
      "user-agent-match"
    ],
    "crawl_traffic_share": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://radar.cloudflare.com/bots"
    },
    "targeted_content_type": "HTML, text",
    "documentation_url": "https://developers.facebook.com/docs/sharing/webmasters/crawler",
    "first_seen_date": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://developers.facebook.com/docs/sharing/webmasters/crawler"
    },
    "last_verified_date": "2026-06-15",
    "block_vs_allow_recommendation": "conditional — training crawler for Llama/Meta AI; allow to be represented, block via robots.txt to opt out. No authoritative IP file makes strict verification harder; rate-limit at the edge if needed.",
    "citation_referral_value": "low (training; does not itself cite or refer)",
    "cloudflare_verified_category": {
      "value": null,
      "verify_status": "verify-against-primary-at-build",
      "source_hint": "https://radar.cloudflare.com/bots/directory/meta-externalagent"
    },
    "status": "active",
    "triples": [
      [
        "Meta-ExternalAgent",
        "operated_by",
        "Meta"
      ],
      [
        "Meta-ExternalAgent",
        "has_bot_type",
        "training"
      ],
      [
        "Meta-ExternalAgent",
        "verified_via",
        "user-agent-match"
      ]
    ],
    "attribute_sources": [
      {
        "claims": [
          "robots_token",
          "documentation_url",
          "respects_robots"
        ],
        "source": "https://developers.facebook.com/docs/sharing/webmasters/crawler",
        "last_verified": "2026-06-15"
      }
    ]
  }
}