Skip to content

Commit

Permalink
fix: fix broken/missing URL links (#367)
Browse files Browse the repository at this point in the history
  • Loading branch information
blaine-arcjet committed Sep 5, 2024
1 parent f7000aa commit 4683176
Showing 1 changed file with 21 additions and 11 deletions.
32 changes: 21 additions & 11 deletions crawler-user-agents.json
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,7 @@
},
{
"pattern": "Adidxbot",
"url": "http://onlinehelp.microsoft.com/en-us/bing/hh204496.aspx",
"url": "https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0",
"instances": []
},
{
Expand Down Expand Up @@ -848,9 +848,12 @@
]
},
{
"pattern": "sistrix crawler",
"pattern": "(sistrix|SISTRIX) [cC]rawler",
"addition_date": "2011/08/02",
"instances": []
"url": "https://www.sistrix.com/tutorials/crawling-errors-in-the-optimizer/",
"instances": [
"Mozilla/5.0 (compatible; SISTRIX Crawler; http://crawler.sistrix.net/)"
]
},
{
"pattern": "Ahrefs(Bot|SiteAudit)",
Expand Down Expand Up @@ -1080,6 +1083,7 @@
{
"pattern": "lssbot",
"addition_date": "2012/05/15",
"url": "https://www.lssbot.com/",
"instances": []
},
{
Expand Down Expand Up @@ -1178,6 +1182,7 @@
{
"pattern": "backlinkcrawler",
"addition_date": "2013/01/04",
"url": "http://www.backlinktest.com/crawler.html",
"instances": []
},
{
Expand Down Expand Up @@ -2274,6 +2279,7 @@
{
"pattern": "LinkArchiver",
"addition_date": "2017/09/24",
"url": "https://github.com/thisisparker/linkarchiver",
"instances": [
"@LinkArchiver twitter bot"
]
Expand Down Expand Up @@ -2306,6 +2312,7 @@
{
"pattern": "dcrawl",
"addition_date": "2017/09/22",
"url": "https://github.com/kgretzky/dcrawl",
"instances": [
"dcrawl/1.0"
]
Expand Down Expand Up @@ -2454,6 +2461,7 @@
{
"pattern": "AHC\\/",
"addition_date": "2017/11/02",
"url": "https://github.com/AsyncHttpClient/async-http-client",
"instances": [
"AHC/2.0"
]
Expand Down Expand Up @@ -2525,7 +2533,7 @@
{
"pattern": "Traackr\\.com",
"addition_date": "2017/11/02",
"url": "Traackr.com",
"url": "https://www.traackr.com/",
"instances": [
"Traackr.com"
]
Expand Down Expand Up @@ -3146,7 +3154,7 @@
"instances": [
"Mozilla/5.0 zgrab/0.x"
],
"url": "https://zmap.io/"
"url": "https://github.com/zmap/zgrab2"
},
{
"pattern": "PR-CY\\.RU",
Expand Down Expand Up @@ -3270,6 +3278,7 @@
{
"pattern": "VelenPublicWebCrawler",
"addition_date": "2018/10/09",
"url": "https://velen.io/",
"instances": [
"VelenPublicWebCrawler (velen.io)"
]
Expand Down Expand Up @@ -3932,7 +3941,7 @@
"instances": [
"SentiBot www.sentibot.eu (compatible with Googlebot)"
],
"url": "https://www.sentibot.eu"
"url": "https://sites.google.com/senti1.com/sentibot-eu/home"
},
{
"pattern": "Domains Project\\/",
Expand Down Expand Up @@ -4018,7 +4027,7 @@
"instances": [
"rssbot/1.4.3 (+https://t.me/RustRssBot)"
],
"url": "https://t.me/RustRssBot"
"url": "https://github.com/iovxw/rssbot"
},
{
"pattern": "startmebot\\/",
Expand Down Expand Up @@ -4082,7 +4091,7 @@
"Mozilla/5.0 (compatible; RidderBot/1.0; [email protected])",
"Mozilla/5.0 (compatible; RidderBot/1.0; [email protected]) (iPhone; CPU iPhone OS 8_4_1 like Mac OS X) Mobile/12H321"
],
"url": "http://brandonmedia.net"
"url": "https://ridder.co/"
},
{
"pattern": "Taboolabot",
Expand Down Expand Up @@ -4206,8 +4215,7 @@
"addition_date": "2022/04/26",
"instances": [
"Mozilla/5.0 (compatible; Go-http-client/1.1; [email protected])"
],
"url": "unknown"
]
},
{
"pattern": "Viber",
Expand All @@ -4220,13 +4228,15 @@
{
"pattern": "e\\.ventures Investment Crawler",
"addition_date": "2021/06/05",
"url": "https://www.eventures.vc/",
"instances": [
"e.ventures Investment Crawler (eventures.vc)"
]
},
{
"pattern": "evc-batch",
"addition_date": "2021/06/07",
"url": "https://www.eventures.vc/",
"instances": [
"Mozilla/5.0 (compatible; evc-batch/2.0)"
]
Expand Down Expand Up @@ -4837,7 +4847,7 @@
"instances": [
"Mozilla/5.0 (compatible; ImagesiftBot; +imagesift.com)"
],
"url": "https://imagesift.com"
"url": "https://imagesift.com/about"
},
{
"pattern": "Expanse",
Expand Down

0 comments on commit 4683176

Please sign in to comment.