my nixos config
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

move the agents list to a separate file

chfour d4efe74a 5c837e05

+81 -80
+80
machines/_common/disallowed-user-agents.nix
··· 1 + [ 2 + # https://git.madhouse-project.org/iocaine/nam-shub-of-enki/src/branch/main/org/module 3 + # https://momenticmarketing.com/blog/ai-search-crawlers-bots 4 + # https://darkvisitors.com/agents 5 + # ironically, https://www.amazon.com/robots.txt 6 + # and https://radar.cloudflare.com/en-us/bots/directory?category=AI_CRAWLER&kind=all 7 + # https://donotsta.re/objects/4ff43b3e-2a56-48f1-870b-48353cd90801 8 + # > DON’T LET YOUR DREAMS STAY DREAMS! 9 + 10 + # "Nexus 5X Build/MMB29P" 11 + # apparently that also blocks normal googlebot. oops! 12 + "Google-Extended" "Google-CloudVertexBot" "Gemini-Deep-Research" 13 + "GoogleOther" "Google-NotebookLM" "GoogleAgent" 14 + "FacebookBot" "meta-externalagent" "meta-externalfetcher" 15 + "facebookexternalhit" "facebookcatalog" 16 + "anthropic" "Claude-Web" "Claude-SearchBot" "Claude-User" "ClaudeBot" 17 + "OpenAI" "GPTBot" "ChatGPT-User" "OAI-SearchBot" "ChatGPT Agent" 18 + "PerplexityBot" "Perplexity-User" 19 + "MistralAI-User" 20 + "amazon" "NovaAct" # "Amazonbot" "amazon-kendra" 21 + "Applebot-Extended" 22 + "DuckAssistBot" "Copilot" "BingBot" "LinkedInBot" 23 + "Bytespider" 24 + "PetalBot" "PanguBot" # these are from huawei 25 + "omgili" "webzio" # "Webzio-Extended" 26 + "Anchor Browser" "Awario" "LinerBot" "factset_spyderbot" "magpie-crawler" 27 + "CCBot" "YouBot" "Diffbot" "cohere-ai" "Novellum" "EchoboxBot" "WARDBot" 28 + "Sidetrade indexer bot" "TimpiBot" "semrush" "Scrapy" "Devin" 29 + "babbar.tech" "barkrowler" "BLEXBot" "DotBot" "ICC-Crawler" "Cotoyogi" 30 + "ahrefs" "DataForSeoBot" "ImagesiftBot" "EtaoSpider" "QualifiedBot" 31 + "MJ12Bot" "dataforseo-bot" "bigsur.ai" "Datenbank Crawler" "netEstate" 32 + "LivelapBot" "SummalyBot" 33 + "Firefox/72.0" "Arc/" 34 + "Kangaroo Bot" # fucking crazy lmao 35 + "TurnitinBot" # >This robot collects content from the Internet for the sole purpose of helping educational institutions prevent plagiarism. 36 + # ,--------------------------------------. 37 + # / Expanse, a Palo Alto Networks company, \ 38 + # | searches across the global IPv4 space | 39 + # | multiple times per day to identify | 40 + # | customers' presences on the Internet.| 41 + # | If you would like to be excluded from our| 42 + # | scans, please send IP addresses/domains | 43 + # \ to: scaninfo@paloaltonetworks.com / 44 + # `---__ ________-------' 45 + # -- _------------- 46 + # \/ 47 + # :nerd: 48 + "Palo Alto Networks" 49 + 50 + "RepoLookoutBot" "Censys" 51 + "SearchExpress" 52 + "leakix" "l9scan" 53 + "scanner.ducks.party" # youre a fucking bozo 54 + "Netcraft" 55 + # epic fail 56 + # Mozilla/5.0 \(Windows NT 10.0\; Win64\; x64\) AppleWebKit/537.36 \(KHTML, like Gecko\) Chrome/100.0.4896.60 Safari/537.36 57 + "\\(" "\\)" "\\;" 58 + # booo booo get better useragents 59 + "SonyEricsson" "Symbian" "J2ME" 60 + "Blackberry" "PlayBook" "RIM Tablet OS" 61 + # MOT-V9mm/00.62 UP.Browser/6.2.3.4.c.1.123 (GUI) MMP/2.0 62 + "MOT-" # maybe? 63 + "Windows ME" "Windows XP" "Windows CE" "Win 9x" 64 + "IRIX" 65 + # FAST-WebCrawler/3.8 (crawler at trd dot overture dot com; http://www.alltheweb.com/help/webmaster/crawler) 66 + # dude i cant even go to your website 67 + # this might not even exist anymore 68 + "FAST-WebCrawler" 69 + "PPC Mac OS X" 70 + "OS/2" # really bro 71 + # "Mozilla/2.0 (compatible; Ask Jeeves/Teoma)" 72 + # are you for fucking real 73 + "Ask Jeeves" 74 + "Opera" # this is old opera, new opera is "OPR/version" 75 + # ELinks/0.9.3 (textmode; Linux 2.6.9-kanotix-8 i686; 127x41) 76 + # where the hell are you getting these useragents from, gnu.org?? 77 + "Linux 2" 78 + "Knoppix" # nope lmao 79 + "Konqueror" 80 + ]
+1 -80
machines/fovps/services/caddy/default.nix
··· 1 1 { pkgs, config, ... }: 2 2 3 3 let 4 - disallowedAgents = [ 5 - # https://git.madhouse-project.org/iocaine/nam-shub-of-enki/src/branch/main/org/module 6 - # https://momenticmarketing.com/blog/ai-search-crawlers-bots 7 - # https://darkvisitors.com/agents 8 - # ironically, https://www.amazon.com/robots.txt 9 - # and https://radar.cloudflare.com/en-us/bots/directory?category=AI_CRAWLER&kind=all 10 - # https://donotsta.re/objects/4ff43b3e-2a56-48f1-870b-48353cd90801 11 - # > DON’T LET YOUR DREAMS STAY DREAMS! 12 - 13 - # "Nexus 5X Build/MMB29P" 14 - # apparently that also blocks normal googlebot. oops! 15 - "Google-Extended" "Google-CloudVertexBot" "Gemini-Deep-Research" 16 - "GoogleOther" "Google-NotebookLM" "GoogleAgent" 17 - "FacebookBot" "meta-externalagent" "meta-externalfetcher" 18 - "facebookexternalhit" "facebookcatalog" 19 - "anthropic" "Claude-Web" "Claude-SearchBot" "Claude-User" "ClaudeBot" 20 - "OpenAI" "GPTBot" "ChatGPT-User" "OAI-SearchBot" "ChatGPT Agent" 21 - "PerplexityBot" "Perplexity-User" 22 - "MistralAI-User" 23 - "amazon" "NovaAct" # "Amazonbot" "amazon-kendra" 24 - "Applebot-Extended" 25 - "DuckAssistBot" "Copilot" "BingBot" "LinkedInBot" 26 - "Bytespider" 27 - "PetalBot" "PanguBot" # these are from huawei 28 - "omgili" "webzio" # "Webzio-Extended" 29 - "Anchor Browser" "Awario" "LinerBot" "factset_spyderbot" "magpie-crawler" 30 - "CCBot" "YouBot" "Diffbot" "cohere-ai" "Novellum" "EchoboxBot" "WARDBot" 31 - "Sidetrade indexer bot" "TimpiBot" "semrush" "Scrapy" "Devin" 32 - "babbar.tech" "barkrowler" "BLEXBot" "DotBot" "ICC-Crawler" "Cotoyogi" 33 - "ahrefs" "DataForSeoBot" "ImagesiftBot" "EtaoSpider" "QualifiedBot" 34 - "MJ12Bot" "dataforseo-bot" "bigsur.ai" "Datenbank Crawler" "netEstate" 35 - "LivelapBot" "SummalyBot" 36 - "Firefox/72.0" "Arc/" 37 - "Kangaroo Bot" # fucking crazy lmao 38 - "TurnitinBot" # >This robot collects content from the Internet for the sole purpose of helping educational institutions prevent plagiarism. 39 - # ,--------------------------------------. 40 - # / Expanse, a Palo Alto Networks company, \ 41 - # | searches across the global IPv4 space | 42 - # | multiple times per day to identify | 43 - # | customers' presences on the Internet.| 44 - # | If you would like to be excluded from our| 45 - # | scans, please send IP addresses/domains | 46 - # \ to: scaninfo@paloaltonetworks.com / 47 - # `---__ ________-------' 48 - # -- _------------- 49 - # \/ 50 - # :nerd: 51 - "Palo Alto Networks" 52 - 53 - "RepoLookoutBot" "Censys" 54 - "SearchExpress" 55 - "leakix" "l9scan" 56 - "scanner.ducks.party" # youre a fucking bozo 57 - "Netcraft" 58 - # epic fail 59 - # Mozilla/5.0 \(Windows NT 10.0\; Win64\; x64\) AppleWebKit/537.36 \(KHTML, like Gecko\) Chrome/100.0.4896.60 Safari/537.36 60 - "\\(" "\\)" "\\;" 61 - # booo booo get better useragents 62 - "SonyEricsson" "Symbian" "J2ME" 63 - "Blackberry" "PlayBook" "RIM Tablet OS" 64 - # MOT-V9mm/00.62 UP.Browser/6.2.3.4.c.1.123 (GUI) MMP/2.0 65 - "MOT-" # maybe? 66 - "Windows ME" "Windows XP" "Windows CE" "Win 9x" 67 - "IRIX" 68 - # FAST-WebCrawler/3.8 (crawler at trd dot overture dot com; http://www.alltheweb.com/help/webmaster/crawler) 69 - # dude i cant even go to your website 70 - # this might not even exist anymore 71 - "FAST-WebCrawler" 72 - "PPC Mac OS X" 73 - "OS/2" # really bro 74 - # "Mozilla/2.0 (compatible; Ask Jeeves/Teoma)" 75 - # are you for fucking real 76 - "Ask Jeeves" 77 - "Opera" # this is old opera, new opera is "OPR/version" 78 - # ELinks/0.9.3 (textmode; Linux 2.6.9-kanotix-8 i686; 127x41) 79 - # where the hell are you getting these useragents from, gnu.org?? 80 - "Linux 2" 81 - "Knoppix" # nope lmao 82 - "Konqueror" 83 - ]; 4 + disallowedAgents = import ../../../_common/disallowed-user-agents.nix; 84 5 in { 85 6 imports = [ 86 7 ./website.nix