my nixos config
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

make the "bot detection" more awesome sauce added more user-agents, all now in a nix list

chfour 3cee989b d67488ca

+49 -10
+49 -10
machines/fovps/services/caddy/default.nix
··· 3 3 let 4 4 websiteDest = "${config.services.caddy.dataDir}/website"; 5 5 websitePath = builtins.toString website.website.out; 6 + 7 + disallowedAgents = [ 8 + # https://git.madhouse-project.org/iocaine/nam-shub-of-enki/src/branch/main/org/module 9 + # https://momenticmarketing.com/blog/ai-search-crawlers-bots 10 + # https://darkvisitors.com/agents 11 + # ironically, https://www.amazon.com/robots.txt 12 + # and https://radar.cloudflare.com/en-us/bots/directory?category=AI_CRAWLER&kind=all 13 + # https://donotsta.re/objects/4ff43b3e-2a56-48f1-870b-48353cd90801 14 + # > DON’T LET YOUR DREAMS STAY DREAMS! 15 + "Google-Extended" "Google-CloudVertexBot" "Gemini-Deep-Research" 16 + "GoogleOther" "Google-NotebookLM" "GoogleAgent" 17 + "FacebookBot" "meta-externalagent" "meta-externalfetcher" 18 + "facebookexternalhit" "facebookcatalog" 19 + "anthropic" "Claude-Web" "Claude-SearchBot" "Claude-User" "ClaudeBot" 20 + "OpenAI" "GPTBot" "ChatGPT-User" "OAI-SearchBot" "ChatGPT Agent" 21 + "PerplexityBot" "Perplexity-User" 22 + "MistralAI-User" 23 + "amazon" "NovaAct" # "Amazonbot" "amazon-kendra" 24 + "Applebot-Extended" 25 + "DuckAssistBot" "Copilot" "BingBot" "LinkedInBot" 26 + "Bytespider" 27 + "PetalBot" "PanguBot" # these are from huawei 28 + "omgili" "webzio" # "Webzio-Extended" 29 + "Anchor Browser" "Awario" "LinerBot" "factset_spyderbot" "magpie-crawler" 30 + "CCBot" "YouBot" "Diffbot" "cohere-ai" "Novellum" "EchoboxBot" "WARDBot" 31 + "Sidetrade indexer bot" "TimpiBot" "semrush" "Scrapy" "Devin" 32 + "babbar.tech" "barkrowler" "BLEXBot" "DotBot" "ICC-Crawler" "Cotoyogi" 33 + "ahrefs" "DataForSeoBot" "ImagesiftBot" "EtaoSpider" "QualifiedBot" 34 + "MJ12Bot" "dataforseo-bot" "bigsur.ai" "Datenbank Crawler" "netEstate" 35 + "Firefox/72.0" "Arc/" 36 + "Kangaroo Bot" # fucking crazy lmao 37 + # "Nexus 5X Build/MMB29P" 38 + # apparently that also blocks normal googlebot. oops! 39 + ]; 6 40 in { 7 41 services.caddy.enable = true; 8 - services.caddy.extraConfig = '' 42 + services.caddy.extraConfig = let 43 + agentsRegex = with pkgs; "(?i)" + lib.strings.escape [ "'" "\\" ] 44 + (builtins.concatStringsSep "|" ( 45 + builtins.map (lib.strings.escapeRegex) disallowedAgents)); 46 + in '' 47 + (bots) { 48 + @bots <<CEL 49 + header_regexp('User-Agent', '${agentsRegex}') 50 + || header({'x-firefox-ai': '1'}) 51 + CEL 52 + handle @bots { 53 + header X-Fuck-Off "PLEASE do!" 54 + redir https://nbg1-speed.hetzner.com/10GB.bin?BALLS permanent 55 + } 56 + } 9 57 (errors) { 10 58 handle_errors { 11 59 @custom_err file /{err.status_code}.html /err.html ··· 15 63 } 16 64 respond "{err.status_code} {err.status_text} 17 65 " # caddy why 18 - } 19 - } 20 - (bots) { 21 - # https://donotsta.re/objects/4ff43b3e-2a56-48f1-870b-48353cd90801 22 - # > DON’T LET YOUR DREAMS STAY DREAMS! 23 - @bots header_regexp User-Agent "GPTBot|ChatGPT-User|Google-Extended|CCBot|PerplexityBot|anthropic-ai|Claude-Web|ClaudeBot|Amazonbot|FacebookBot|Applebot-Extended|semrush|barkrowler|PetalBot|meta-externalagent|meta-externalfetcher|facebookexternalhit|facebookcatalog|Firefox/72\.0|babbar\.tech|BLEXBot|DotBot|ahrefs|DataForSeoBot|ImagesiftBot|Nexus 5X Build/MMB29P|Arc/|MJ12Bot|dataforseo-bot|MJ12bot" 24 - handle @bots { 25 - header X-Fuck-Off "PLEASE do!" 26 - redir https://nbg1-speed.hetzner.com/10GB.bin?BALLS permanent 27 66 } 28 67 } 29 68 '';