# WordPress robots.txt — allow major search + AI/LLM crawlers ######################################## # Global rules (apply to all crawlers) ######################################## User-agent: * # Keep WP admin private, but allow AJAX Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php # Make sure assets and images are crawlable Allow: /*.css$ Allow: /*.js$ Allow: /wp-content/uploads/ # Avoid thin/duplicate pages Disallow: /?s= Disallow: /search/ Disallow: /trackback/ Disallow: /comments/ Disallow: */feed/ Disallow: */rss/ Disallow: /*?replytocom= Disallow: /*?utm_* Disallow: /*?fbclid= ######################################## # Popular SEARCH ENGINES — explicit allow ######################################## # Google User-agent: Googlebot Allow: / User-agent: Googlebot-Image Allow: / User-agent: Googlebot-News Allow: / # (AI usage preference bot) User-agent: Google-Extended Allow: / # Microsoft Bing User-agent: Bingbot Allow: / # DuckDuckGo User-agent: DuckDuckBot Allow: / # Apple (Apple Search / Spotlight / Siri) User-agent: Applebot Allow: / User-agent: Applebot-Image Allow: / # Yandex User-agent: YandexBot Allow: / # Baidu User-agent: Baiduspider Allow: / # Huawei Petal Search User-agent: PetalBot Allow: / # Naver (Korea) User-agent: Yeti Allow: / # Seznam (Czech) User-agent: SeznamBot Allow: / # Sogou (China) User-agent: Sogou web spider Allow: / ######################################## # AI / LLM CRAWLERS — explicit allow ######################################## # OpenAI (web crawler for models) User-agent: GPTBot Allow: / # OpenAI (search indexing for ChatGPT) User-agent: OAI-SearchBot Allow: / # Perplexity AI User-agent: PerplexityBot Allow: / # Anthropic (Claude) User-agent: ClaudeBot Allow: / # On-demand fetches from Claude when users open links User-agent: Claude-User Allow: / # Common Crawl (training corpus used by many AI systems) User-agent: CCBot Allow: / # Meta (Facebook/Instagram) AI/content fetcher User-agent: meta-externalagent Allow: / ######################################## # Images (extra clarity for Google Images) ######################################## User-agent: Googlebot-Image Allow: /wp-content/uploads/ Sitemap: https://marketcapitalize.com/sitemap_index.xml Sitemap: https://marketcapitalize.com/news-sitemap.xml