# Robots.txt for ExpurHause - Pet Care Services Website User-agent: * # Allow access to main content Allow: / Allow: /index.html Allow: /blog.html Allow: /article1.html Allow: /article2.html Allow: /article3.html Allow: /article4.html Allow: /thanks.html # Allow legal pages Allow: /privacy.html Allow: /terms.html Allow: /cookie-policy.html # Allow CSS and JavaScript files Allow: /css/ Allow: /js/ Allow: /images/ # Block specific file types that shouldn't be indexed Disallow: *.log Disallow: *.tmp Disallow: *.backup # Block any test or development files Disallow: /test/ Disallow: /dev/ Disallow: /staging/ # Allow search engines to access sitemap Sitemap: https://expurhause.media/sitemap.xml # Crawl delay (be respectful to server resources) Crawl-delay: 1 # Specific rules for major search engines User-agent: Googlebot Allow: / User-agent: Bingbot Allow: / User-agent: Slurp Allow: / # Block aggressive crawlers or scrapers User-agent: AhrefsBot Crawl-delay: 10 User-agent: MJ12bot Crawl-delay: 10 # Block AI training bots if desired (optional) # User-agent: GPTBot # Disallow: / # User-agent: ChatGPT-User # Disallow: / # Cache-friendly directives for static assets # Note: These are informational as robots.txt doesn't control caching # Actual caching should be configured at server level