From 923130b39db807eef1d3644c6f5f43f3582896c5 Mon Sep 17 00:00:00 2001
From: Martin Monperrus <martin.monperrus@gnieh.org>
Date: Sat, 18 May 2024 11:58:11 +0200
Subject: [PATCH] add usage harness in Python

---
 README.md   | 23 ++++++++++++++++-------
 __init__.py | 22 ++++++++++++++++++++++
 2 files changed, 38 insertions(+), 7 deletions(-)
 create mode 100644 __init__.py

diff --git a/README.md b/README.md
index db421e1..248742e 100644
--- a/README.md
+++ b/README.md
@@ -6,13 +6,15 @@ This repository contains a list of of HTTP user-agents used by robots, crawlers,
 * Go package: <https://pkg.go.dev/github.com/monperrus/crawler-user-agents>
 * PyPi package: <https://pypi.org/project/crawler-user-agents/>
 
+Each `pattern` is a regular expression. It should work out-of-the-box wih your favorite regex library:
+
 ## Install
 
 ### Direct download
 
 Download the [`crawler-user-agents.json` file](https://raw.githubusercontent.com/monperrus/crawler-user-agents/master/crawler-user-agents.json) from this repository directly.
 
-### Npm / Yarn
+### Javascript
 
 crawler-user-agents is deployed on npmjs.com: <https://www.npmjs.com/package/crawler-user-agents>
 
@@ -31,14 +33,21 @@ const crawlers = require('crawler-user-agents');
 console.log(crawlers);
 ```
 
-## Usage
+### Python
 
-Each `pattern` is a regular expression. It should work out-of-the-box wih your favorite regex library:
+Install with `pip install crawler-user-agents`
+
+Then:
+
+```python
+import crawleruseragents
+if crawleruseragents.is_crawler("googlebot/"):
+   # do something
+```
+
+### Go
 
-* JavaScript: `if (RegExp(entry.pattern).test(req.headers['user-agent']) { ... }`
-* PHP: add a slash before and after the pattern: `if (preg_match('/'.$entry['pattern'].'/', $_SERVER['HTTP_USER_AGENT'])): ...`
-* Python: `if re.search(entry['pattern'], ua): ...`
-* Go: use [this package](https://pkg.go.dev/github.com/monperrus/crawler-user-agents),
+Go: use [this package](https://pkg.go.dev/github.com/monperrus/crawler-user-agents),
   it provides global variable `Crawlers` (it is synchronized with `crawler-user-agents.json`),
   functions `IsCrawler` and `MatchingCrawlers`.
 
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..6ffe061
--- /dev/null
+++ b/__init__.py
@@ -0,0 +1,22 @@
+import crawleruseragents
+import re
+import json
+from importlib import resources 
+    
+def load_json():
+  return json.loads(resources.read_text(crawleruseragents,"crawler-user-agents.json"))
+
+DATA = load_json()
+
+def is_crawler(s):
+  # print(s)
+  for i in DATA:
+    test=re.search(i["pattern"],s,re.IGNORECASE)
+    if test:
+      return True
+  return False
+
+def is_crawler2(s):
+  regexp = re.compile("|".join([i["pattern"] for i in DATA]))
+  return regexp.search(s) != None
+