# The Britographer — robots.txt
# Last updated: 2026-04-28

# Default policy: allow all standard search crawlers
User-agent: *
Allow: /
Disallow: /store/admin/
Disallow: /store/checkout/
Disallow: /field-intel/

# Sitemap
Sitemap: https://www.thebritographer.co.uk/sitemap.xml

# LLM-readable summary
# https://www.thebritographer.co.uk/llms.txt

# ─── AI / LLM crawlers — explicit allow ──────────────────────────────────
# The Britographer welcomes citation in AI search and answer engines.
# Authorship attribution and link to source post requested but not enforced.

User-agent: GPTBot
Allow: /
Disallow: /store/admin/
Disallow: /store/checkout/
Disallow: /field-intel/

User-agent: ChatGPT-User
Allow: /

User-agent: OAI-SearchBot
Allow: /

User-agent: ClaudeBot
Allow: /
Disallow: /store/admin/
Disallow: /store/checkout/
Disallow: /field-intel/

User-agent: anthropic-ai
Allow: /

User-agent: Claude-Web
Allow: /

User-agent: PerplexityBot
Allow: /
Disallow: /store/admin/
Disallow: /store/checkout/
Disallow: /field-intel/

User-agent: Perplexity-User
Allow: /

User-agent: Google-Extended
Allow: /
Disallow: /store/admin/
Disallow: /store/checkout/
Disallow: /field-intel/

User-agent: Applebot-Extended
Allow: /

User-agent: Amazonbot
Allow: /

User-agent: cohere-ai
Allow: /

User-agent: Diffbot
Allow: /

# ─── Crawlers explicitly blocked ─────────────────────────────────────────
# Bytespider scrapes aggressively for ByteDance LLM training without
# meaningful attribution back; opted out.

User-agent: Bytespider
Disallow: /

# CCBot (Common Crawl) — blocked because Common Crawl data is used to train
# many models without attribution to source. Reverse this if site policy changes.

User-agent: CCBot
Disallow: /