Skip to content

Commit

Permalink
Use 'mypy' and 'ruff' to do extra Python linting
Browse files Browse the repository at this point in the history
Also, format the Python code to the 'ruff format' standard.
  • Loading branch information
jribbens committed Sep 5, 2024
1 parent 56f8195 commit d1c3094
Show file tree
Hide file tree
Showing 10 changed files with 266 additions and 175 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/ci-validation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ jobs:
- run: node format.js --check
- run: pip3 install -e .[dev]
- run: py.test -vv
- run: mypy --strict *.py crawleruseragents/*.py
- run: ruff check
- run: ruff format --check
- run: python3 validate.py
- run: php validate.php
- run: go test
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
.cache
__pycache__
/vendor/
/build/
/dist/
/env/
/vendor/
*.egg-info/
3 changes: 2 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
include *.json
LICENSE
README.md
14 changes: 12 additions & 2 deletions __init__.py → crawleruseragents/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
import re
import json
from pathlib import Path
from typing import Required, TypedDict


def load_json():
class UserAgent(TypedDict, total=False):
addition_date: str
depends_on: list[str]
description: str
instances: Required[list[str]]
pattern: Required[str]
url: str


def load_json() -> list[UserAgent]:
cwd = Path(__file__).parent
user_agents_file_path = cwd / "crawler-user-agents.json"
with user_agents_file_path.open() as patterns_file:
return json.load(patterns_file)
return json.load(patterns_file) # type: ignore


CRAWLER_USER_AGENTS_DATA = load_json()
Expand Down
1 change: 1 addition & 0 deletions crawleruseragents/crawler-user-agents.json
Empty file added crawleruseragents/py.typed
Empty file.
14 changes: 10 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"

[project]
name = "crawler-user-agents"
version = "0.1"
Expand All @@ -14,18 +18,20 @@ dev = [
"iniconfig==2.0.0",
"jsonschema==4.22.0",
"jsonschema-specifications==2023.12.1",
"mypy==1.11.2",
"mypy-extensions==1.0.0",
"packaging==24.0",
"pluggy==1.5.0",
"pytest==8.2.0",
"referencing==0.35.0",
"rpds-py==0.18.0",
"ruff==0.6.4",
"types-jsonschema==4.23.0.20240813",
"typing-extensions==4.12.2",
]

[project.urls]
Homepage = "https://github.com/monperrus/crawler-user-agents"

[tool.setuptools]
package-dir = {"crawleruseragents" = "."}

[tool.setuptools.package-data]
"*" = ["*.json"]
packages = ["crawleruseragents"]
13 changes: 7 additions & 6 deletions test_harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,36 @@
$ pytest test_harness.py
"""

from crawleruseragents import is_crawler, matching_crawlers


def test_match():
def test_match() -> None:
assert is_crawler("test Googlebot/2.0 test") is True


def test_nomatch():
def test_nomatch() -> None:
assert is_crawler("!!!!!!!!!!!!") is False


def test_case():
def test_case() -> None:
assert is_crawler("test googlebot/2.0 test") is False


def test_matching_crawlers_match():
def test_matching_crawlers_match() -> None:
result = matching_crawlers("test Googlebot/2.0 test")
assert isinstance(result, list)
assert len(result) > 0
assert all(isinstance(val, int) for val in result)


def test_matching_crawlers_nomatch():
def test_matching_crawlers_nomatch() -> None:
result = matching_crawlers("!!!!!!!!!!!!")
assert isinstance(result, list)
assert len(result) == 0


def test_matching_crawlers_case():
def test_matching_crawlers_case() -> None:
result = matching_crawlers("test googlebot/2.0 test")
assert isinstance(result, list)
assert len(result) == 0
Loading

0 comments on commit d1c3094

Please sign in to comment.