# SPDX-FileCopyrightText: The itiquette Authors # SPDX-License-Identifier: CC0-1.0 # # Policy: allow normal search-engine indexing AND on-demand AI answer # engines (so a user asking ChatGPT/Claude/Perplexity "how do I configure # gommitlint?" can be answered from the live docs). Block bulk crawlers # that scrape the site for AI model training. # # This is an honor-system signal — well-behaved bots respect it; others # won't. See also /.well-known/ai.txt and the meta tags in . User-agent: * Allow: / # ────────────────────────────────────────────────────────────────────── # Blocked: AI training / dataset crawlers # ────────────────────────────────────────────────────────────────────── # OpenAI training crawler User-agent: GPTBot Disallow: / # Anthropic training crawlers User-agent: ClaudeBot Disallow: / User-agent: Claude-Web Disallow: / User-agent: anthropic-ai Disallow: / # Google AI training (Gemini / Vertex). Does NOT affect Googlebot search indexing. User-agent: Google-Extended Disallow: / # Apple AI training. Does NOT affect Applebot search indexing. User-agent: Applebot-Extended Disallow: / # Meta AI training User-agent: FacebookBot Disallow: / User-agent: Meta-ExternalAgent Disallow: / # Amazon AI training User-agent: Amazonbot Disallow: / # ByteDance / TikTok training User-agent: Bytespider Disallow: / # Common Crawl — feeds many training datasets User-agent: CCBot Disallow: / # Perplexity index/training crawler (distinct from Perplexity-User on-demand) User-agent: PerplexityBot Disallow: / # Other training / dataset crawlers User-agent: YouBot Disallow: / User-agent: PhindBot Disallow: / User-agent: cohere-ai Disallow: / User-agent: cohere-training-data-crawler Disallow: / User-agent: Diffbot Disallow: / User-agent: ImagesiftBot Disallow: / User-agent: img2dataset Disallow: / User-agent: omgili Disallow: / User-agent: Omgilibot Disallow: / User-agent: Timpibot Disallow: / User-agent: AI2Bot Disallow: / User-agent: AI2Bot-Dolma Disallow: / User-agent: Kangaroo Bot Disallow: / User-agent: PanguBot Disallow: / User-agent: Sidetrade indexer bot Disallow: / User-agent: Velen Crawler Disallow: / User-agent: webzio-extended Disallow: / # ────────────────────────────────────────────────────────────────────── # Allowed: on-demand AI answer engines (user-initiated single fetch) # Listed explicitly so the policy is visible — they fall under the # default "User-agent: * / Allow: /" above. # ────────────────────────────────────────────────────────────────────── # OpenAI ChatGPT on-demand browse + search-result fetch User-agent: ChatGPT-User Allow: / User-agent: OAI-SearchBot Allow: / # Anthropic on-demand fetch + search-result fetch User-agent: Claude-User Allow: / User-agent: Claude-SearchBot Allow: / # Perplexity on-demand answer fetch User-agent: Perplexity-User Allow: / # Meta on-demand link/preview fetcher User-agent: Meta-ExternalFetcher Allow: / # Mistral on-demand fetch User-agent: MistralAI-User Allow: / # DuckDuckGo Assist on-demand fetch User-agent: DuckAssistBot Allow: / Sitemap: https://docs.itiquette.org/sitemap.xml