# Faytech North America Robots.txt # Last Updated: 2025 # Purpose: Welcome AI/LLM crawlers for SEO/GSO/AEO while securing WordPress directories # ============================================ # SITEMAP DECLARATION # ============================================ Sitemap: https://www.faytech.us/sitemap_index.xml # ============================================ # AI/LLM CRAWLERS - FULL ACCESS GRANTED # ============================================ # These bots are explicitly welcomed for AI search optimization # OpenAI (ChatGPT, SearchGPT) User-agent: GPTBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: CCBot Allow: / # Anthropic (Claude) User-agent: anthropic-ai Allow: / User-agent: Claude-Web Allow: / # Google (Bard/Gemini & Traditional Search) User-agent: Googlebot Allow: / User-agent: Google-Extended Allow: / User-agent: Googlebot-Image Allow: / # Microsoft (Bing & Copilot) User-agent: Bingbot Allow: / User-agent: MSNBot Allow: / # Perplexity AI User-agent: PerplexityBot Allow: / # You.com User-agent: YouBot Allow: / # Meta AI (Facebook/Instagram) User-agent: facebookexternalhit Allow: / User-agent: meta-externalagent Allow: / # Apple (Siri & Spotlight) User-agent: Applebot Allow: / User-agent: Applebot-Extended Allow: / # Amazon Alexa User-agent: Amazonbot Allow: / # Neeva AI Search User-agent: Neevabot Allow: / # Yandex User-agent: YandexBot Allow: / # DuckDuckGo User-agent: DuckDuckBot Allow: / # ============================================ # LLM TRAINING CONTENT - EXPLICIT ALLOW # ============================================ # Make sure LLMs can access our specially crafted content User-agent: * Allow: /llms.txt Allow: /wp-content/uploads/ Allow: /*.css$ Allow: /*.js$ Allow: /*.jpg$ Allow: /*.jpeg$ Allow: /*.png$ Allow: /*.gif$ Allow: /*.svg$ Allow: /*.webp$ Allow: /*.woff$ Allow: /*.woff2$ Allow: /*.ttf$ Allow: /*.eot$ # ============================================ # WORDPRESS SECURITY - BLOCKED DIRECTORIES # ============================================ # Protect sensitive WordPress directories and files User-agent: * # Core WordPress directories to block Disallow: /wp-admin/ Disallow: /wp-includes/ Disallow: /wp-content/plugins/ Disallow: /wp-content/cache/ Disallow: /wp-content/themes/ # WordPress files to block Disallow: /wp-login.php Disallow: /wp-signup.php Disallow: /wp-register.php Disallow: /xmlrpc.php # Configuration and system files Disallow: /wp-config.php Disallow: /.htaccess Disallow: /readme.html Disallow: /license.txt # ============================================ # COMMON SECURITY & PERFORMANCE BLOCKS # ============================================ # Additional paths commonly blocked for security and performance User-agent: * # Search and filter pages (prevent duplicate content) Disallow: /search/ Disallow: /filter/ Disallow: /*?s= Disallow: /*?p= Disallow: /*?attachment_id= Disallow: /*?replytocom= # Session and tracking parameters Disallow: /*?session Disallow: /*?sid= Disallow: /*?utm_ Disallow: /*?ref= # Archive and pagination issues Disallow: /tag/ Disallow: /author/ Disallow: /feed/ Disallow: /rss/ Disallow: /embed/ Disallow: */trackback/ Disallow: */comment-page- # Cart and checkout (if e-commerce) Disallow: /cart/ Disallow: /checkout/ Disallow: /my-account/ # AJAX and API endpoints Disallow: /wp-json/ Disallow: /ajax/ # Temporary and private directories Disallow: /tmp/ Disallow: /temp/ Disallow: /private/ Disallow: /draft/ Disallow: /staging/ # ============================================ # BLOCK MALICIOUS & UNWANTED BOTS # ============================================ # Known bad actors and resource-wasting crawlers User-agent: PetalBot Disallow: / User-agent: Bytespider Disallow: / User-agent: ia_archiver Disallow: / User-agent: SEOkicks-Robot Disallow: / User-agent: Nuclei Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: AhrefsBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: SiteAuditBot Disallow: / # ============================================ # DEFAULT RULE FOR UNKNOWN BOTS # ============================================ # General access with security restrictions maintained User-agent: * Allow: / Disallow: /wp-admin/ Disallow: /wp-includes/