From ccd9484ab8f84bfc4e611112e9c1c93e6753221d Mon Sep 17 00:00:00 2001 From: yitzhaq Date: Sun, 5 Feb 2023 23:30:46 +0100 Subject: [PATCH 01/15] Remove overly greedy entry "Not", which blocks every phone or tablet in the popular Redmi Note series of phones and tablets, made by Xiaomi: --- _generator_lists/bad-user-agents.list | 1 - 1 file changed, 1 deletion(-) diff --git a/_generator_lists/bad-user-agents.list b/_generator_lists/bad-user-agents.list index c37924fc9..0a97b0ad9 100755 --- a/_generator_lists/bad-user-agents.list +++ b/_generator_lists/bad-user-agents.list @@ -317,7 +317,6 @@ NimbleCrawler Nimbostratus Ninja Nmap -Not Nuclei Nutch Octopus From 9539ad2489a0f57949f97293c37aba661392eb2c Mon Sep 17 00:00:00 2001 From: yitzhaq Date: Sun, 5 Feb 2023 23:32:42 +0100 Subject: [PATCH 02/15] Remove "PyCurl" entry, which is just a Python wrapper for cURL, and nothing inherently malicious --- _generator_lists/bad-user-agents.list | 1 - 1 file changed, 1 deletion(-) diff --git a/_generator_lists/bad-user-agents.list b/_generator_lists/bad-user-agents.list index 0a97b0ad9..b27c177ff 100755 --- a/_generator_lists/bad-user-agents.list +++ b/_generator_lists/bad-user-agents.list @@ -361,7 +361,6 @@ Psbot Pu_iN Pump PxBroker -PyCurl QueryN\ Metasearch Quick-Crawler RSSingBot From dfd2b7ae783e47cd91a4acf138d604a67636ce7c Mon Sep 17 00:00:00 2001 From: yitzhaq Date: Sun, 5 Feb 2023 23:34:22 +0100 Subject: [PATCH 03/15] Remove LWP entries - this is just Perl's normal request library --- _generator_lists/bad-user-agents.list | 3 --- 1 file changed, 3 deletions(-) diff --git a/_generator_lists/bad-user-agents.list b/_generator_lists/bad-user-agents.list index b27c177ff..5b56b70bb 100755 --- a/_generator_lists/bad-user-agents.list +++ b/_generator_lists/bad-user-agents.list @@ -225,7 +225,6 @@ Keyword\ Density Kinza Kozmosbot LNSpiderguy -LWP::Simple Lanshanbot Larbin Leap @@ -589,8 +588,6 @@ ips-agent isitwp.com iubenda-radar linkdexbot -lwp-request -lwp-trivial magpie-crawler meanpathbot mediawords From c79f42d169bb89746873701f0fa355b0e3b78dd7 Mon Sep 17 00:00:00 2001 From: yitzhaq Date: Sun, 5 Feb 2023 23:36:02 +0100 Subject: [PATCH 04/15] Remove overly greedy entry "Leap", which blocks every app with "MobileApp" in its UA string --- _generator_lists/bad-user-agents.list | 1 - 1 file changed, 1 deletion(-) diff --git a/_generator_lists/bad-user-agents.list b/_generator_lists/bad-user-agents.list index 5b56b70bb..8f5a820e3 100755 --- a/_generator_lists/bad-user-agents.list +++ b/_generator_lists/bad-user-agents.list @@ -227,7 +227,6 @@ Kozmosbot LNSpiderguy Lanshanbot Larbin -Leap LeechFTP LeechGet LexiBot From 1ee8f926babdac90386ca2f636eaedc6d3ee297c Mon Sep 17 00:00:00 2001 From: yitzhaq Date: Sun, 5 Feb 2023 23:37:33 +0100 Subject: [PATCH 05/15] Remove overly greedy entry "Ninja", which blocks every request from Microsoft Teams. Their agent uses UA strings like this: MicrosoftNinja/1.0+Teams/1.0+(ExchangeServicesClient/0.0.0.0)+SkypeSpaces/1.0a$*+ This breaks MS Teams integration with MS Exchange, possibly more. --- _generator_lists/bad-user-agents.list | 1 - 1 file changed, 1 deletion(-) diff --git a/_generator_lists/bad-user-agents.list b/_generator_lists/bad-user-agents.list index 8f5a820e3..3210da632 100755 --- a/_generator_lists/bad-user-agents.list +++ b/_generator_lists/bad-user-agents.list @@ -313,7 +313,6 @@ Niki-bot Nikto NimbleCrawler Nimbostratus -Ninja Nmap Nuclei Nutch From 954ea714ca8b8e65557f97c16ebf1344d45bbae4 Mon Sep 17 00:00:00 2001 From: yitzhaq Date: Sun, 5 Feb 2023 23:40:49 +0100 Subject: [PATCH 06/15] Remove overly greedy entry "oBot", which matches (among any other type of robot) "UptimeRobot", which is supposed to be limited - not blocked --- _generator_lists/bad-user-agents.list | 1 - 1 file changed, 1 deletion(-) diff --git a/_generator_lists/bad-user-agents.list b/_generator_lists/bad-user-agents.list index 3210da632..0d85d39a8 100755 --- a/_generator_lists/bad-user-agents.list +++ b/_generator_lists/bad-user-agents.list @@ -591,7 +591,6 @@ meanpathbot mediawords muhstik-scan netEstate\ NE\ Crawler -oBot page\ scorer pcBrowser plumanalytics From d55d6b2e794a88e0e49ed512b64da91df9e5d7f9 Mon Sep 17 00:00:00 2001 From: yitzhaq Date: Sun, 5 Feb 2023 23:44:42 +0100 Subject: [PATCH 07/15] Remove overly greedy entry WEBDAV, which can (and in many cases will) block legitimate WebDAV clients correctly identifying as such. WebDAV is a very widely used official HTTP standard, and is the foundation of popular solutions like Nextcloud and ownCloud. --- _generator_lists/bad-user-agents.list | 1 - 1 file changed, 1 deletion(-) diff --git a/_generator_lists/bad-user-agents.list b/_generator_lists/bad-user-agents.list index 0d85d39a8..f6a331e34 100755 --- a/_generator_lists/bad-user-agents.list +++ b/_generator_lists/bad-user-agents.list @@ -489,7 +489,6 @@ Voil Voltron WASALive-Bot WBSearchBot -WEBDAV WISENutbot WPScan WWW-Collector-E From a53f776ea41cb8e12f1abc60a3d010d69ac05553 Mon Sep 17 00:00:00 2001 From: yitzhaq Date: Sun, 5 Feb 2023 23:47:23 +0100 Subject: [PATCH 08/15] Remove "Bullseye" entry. This is the codename for Debian 11 (the current stable release), and the version codename string is used as part of UA for several of its packages, such as Python's pip --- _generator_lists/bad-user-agents.list | 1 - 1 file changed, 1 deletion(-) diff --git a/_generator_lists/bad-user-agents.list b/_generator_lists/bad-user-agents.list index f6a331e34..3c673e84b 100755 --- a/_generator_lists/bad-user-agents.list +++ b/_generator_lists/bad-user-agents.list @@ -62,7 +62,6 @@ Buck Buddy BuiltBotTough BuiltWith -Bullseye BunnySlippers BuzzSumo CATExplorador From f783035b6374d5570a1eeb5ab01445675adedf58 Mon Sep 17 00:00:00 2001 From: yitzhaq Date: Sun, 5 Feb 2023 23:50:29 +0100 Subject: [PATCH 09/15] Remove "Siteimprove" - this is a paid service for assessing your site, in terms of features such as accessibility. If you're receiving requests from them, it's likely because you asked (and paid) them to. --- _generator_lists/bad-user-agents.list | 1 - 1 file changed, 1 deletion(-) diff --git a/_generator_lists/bad-user-agents.list b/_generator_lists/bad-user-agents.list index 3c673e84b..a8879bbae 100755 --- a/_generator_lists/bad-user-agents.list +++ b/_generator_lists/bad-user-agents.list @@ -413,7 +413,6 @@ SiteSnagger SiteSucker Site\ Sucker Sitebeam -Siteimprove Sitevigil SlySearch SmartDownload From 89810ab36448f6d0859aa6158e7b40a6a394cda3 Mon Sep 17 00:00:00 2001 From: yitzhaq Date: Sun, 5 Feb 2023 23:55:54 +0100 Subject: [PATCH 10/15] Remove "archive.org_bot" - this is Archive.org, a non-profit working to archive digital content and create a library for current and future generations. They are good guys, and if one person is annoyed by their work not strictly adhering to robots.txt, let that person block it locally, rather than cause this level of collateral damage for a good cause. Closes #87 --- _generator_lists/bad-user-agents.list | 1 - 1 file changed, 1 deletion(-) diff --git a/_generator_lists/bad-user-agents.list b/_generator_lists/bad-user-agents.list index a8879bbae..2b9ded318 100755 --- a/_generator_lists/bad-user-agents.list +++ b/_generator_lists/bad-user-agents.list @@ -555,7 +555,6 @@ ZoominfoBot ZumBot ZyBorg adscanner -archive.org_bot arquivo-web-crawler arquivo.pt autoemailspider From e8b82cf71877b82c9e616f232d7744ddb1b0a77a Mon Sep 17 00:00:00 2001 From: yitzhaq Date: Mon, 6 Feb 2023 00:04:02 +0100 Subject: [PATCH 11/15] Remove "Netcraft" entry. Netcraft is an online intelligence company fighting cybercrime on a plethora of different fronts. When they do mine technical data, it's for a good cause, that benefits all of us and makes the Internet a safer place. Their good efforts should not be blocked. --- _generator_lists/bad-user-agents.list | 1 - 1 file changed, 1 deletion(-) diff --git a/_generator_lists/bad-user-agents.list b/_generator_lists/bad-user-agents.list index 2b9ded318..13bfb5a13 100755 --- a/_generator_lists/bad-user-agents.list +++ b/_generator_lists/bad-user-agents.list @@ -303,7 +303,6 @@ NetMechanic NetSpider NetZIP Net\ Vampire -Netcraft Nettrack Netvibes NextGenSearchBot From 2b25b9bcabcfad5fb03669f03b007bdc64eff7fc Mon Sep 17 00:00:00 2001 From: yitzhaq Date: Mon, 6 Feb 2023 00:06:34 +0100 Subject: [PATCH 12/15] Remove "Shodan" entry. Shodan is an extremely powerful tool for vulnerability scanning. It doesn't do harm, it merely discovers weakness in your setup, which makes it possible for you to fix them. Hiding from it doesn't make you more secure - on the contrary. --- _generator_lists/bad-user-agents.list | 1 - 1 file changed, 1 deletion(-) diff --git a/_generator_lists/bad-user-agents.list b/_generator_lists/bad-user-agents.list index 13bfb5a13..e409985f1 100755 --- a/_generator_lists/bad-user-agents.list +++ b/_generator_lists/bad-user-agents.list @@ -403,7 +403,6 @@ SentiBot SeoSiteCheckup SeobilityBot Seomoz -Shodan Siphon SiteCheckerBotCrawler SiteExplorer From 03d3b6b1326719ea6f74dd267dd04a7a6ad325ce Mon Sep 17 00:00:00 2001 From: yitzhaq Date: Mon, 6 Feb 2023 00:08:54 +0100 Subject: [PATCH 13/15] Remove various Chinese entries said to be legit - actual search engine spiders, and embedded views in apps. Closes #133 --- _generator_lists/bad-user-agents.list | 6 ------ 1 file changed, 6 deletions(-) diff --git a/_generator_lists/bad-user-agents.list b/_generator_lists/bad-user-agents.list index e409985f1..0dba3170c 100755 --- a/_generator_lists/bad-user-agents.list +++ b/_generator_lists/bad-user-agents.list @@ -1,5 +1,4 @@ 01h4x.com -360Spider 404checker 404enemy 80legs @@ -180,7 +179,6 @@ HTMLparser HTTP::Lite HTTrack Haansoft -HaosouSpider Harvest Havij Heritrix @@ -275,7 +273,6 @@ Meanpathbot Mediatoolkitbot MegaIndex.ru Metauri -MicroMessenger Microsoft\ Data\ Access Microsoft\ URL\ Control Minefield @@ -419,7 +416,6 @@ Snapbot Snoopy SocialRankIOBot Sociscraper -Sogou\ web\ spider Sosospider Sottopop SpaceBison @@ -563,7 +559,6 @@ clark-crawler coccocbot cognitiveseo com.plumanalytics -crawl.sogou.com crawler.feedback crawler4j dataforseo.com @@ -603,7 +598,6 @@ serpstatbot sexsearcher sitechecker.pro siteripz -sogouspider sp_auditbot spyfu sysscan From af68f5a8611cae2bbd2257cb0b84b79a26297649 Mon Sep 17 00:00:00 2001 From: yitzhaq Date: Mon, 6 Feb 2023 00:13:54 +0100 Subject: [PATCH 14/15] Remove "Exabot" entry, this is said to be a good and respectful bot - --- _generator_lists/bad-user-agents.list | 1 - 1 file changed, 1 deletion(-) diff --git a/_generator_lists/bad-user-agents.list b/_generator_lists/bad-user-agents.list index 0dba3170c..30491f13d 100755 --- a/_generator_lists/bad-user-agents.list +++ b/_generator_lists/bad-user-agents.list @@ -133,7 +133,6 @@ Ecxi EirGrabber EroCrawler Evil -Exabot Express\ WebPictures ExtLinksBot Extractor From 89223984c00c30bb2e962e74428859aa1eaf838e Mon Sep 17 00:00:00 2001 From: yitzhaq Date: Mon, 6 Feb 2023 00:16:06 +0100 Subject: [PATCH 15/15] Remove "Mojeek" entry, this is said to be a good and respectful bot - . Closes #107 --- _generator_lists/bad-user-agents.list | 1 - 1 file changed, 1 deletion(-) diff --git a/_generator_lists/bad-user-agents.list b/_generator_lists/bad-user-agents.list index 30491f13d..2417dae4f 100755 --- a/_generator_lists/bad-user-agents.list +++ b/_generator_lists/bad-user-agents.list @@ -277,7 +277,6 @@ Microsoft\ URL\ Control Minefield Mister\ PiX Moblie Safari -Mojeek Mojolicious MolokaiBot Morfeus\ Fucking\ Scanner