linting and other minor fixes

biglocalnews · Apr 10, 2024 · 3262148 · 3262148
1 parent abcca33
commit 3262148
Show file tree

Hide file tree

Showing 5 changed files with 11 additions and 19 deletions.
diff --git a/clean/ca/san_diego_pd.py b/clean/ca/san_diego_pd.py
@@ -9,6 +9,7 @@
 
 
 def scrape(data_dir=utils.CLEAN_DATA_DIR, cache_dir=utils.CLEAN_CACHE_DIR, throttle=0):
+    """Scrape San Diego Police Department for SB16/SB1421/AB748 data."""
     cache = Cache(cache_dir)
     # This module
     mod = Path(__file__)
@@ -31,7 +32,7 @@ def scrape(data_dir=utils.CLEAN_DATA_DIR, cache_dir=utils.CLEAN_CACHE_DIR, throt
 def download_index_pages(
     base_url, cache, cache_suffix, throttle, page_count, current_page, index_pages=[]
 ):
-    """Download index pages for SB16/SB1421/AB748
+    """Download index pages for SB16/SB1421/AB748.
 
     Index pages link to child pages containing videos and
     other files related to use-of-force and disciplinary incidents.
@@ -68,8 +69,8 @@ def download_index_pages(
     return index_pages
 
 
-### LEGACY CODE BELOW ###
-def scrape_list_page(cache, top_level_urls, base_url, throttle):
+# LEGACY CODE BELOW #
+def _scrape_list_page(cache, top_level_urls, base_url, throttle):
     second_level_urls = {}
     for top_url in top_level_urls:
         page = requests.get(top_url)
@@ -78,21 +79,21 @@ def scrape_list_page(cache, top_level_urls, base_url, throttle):
         six_columns = soup.find_all("div", class_="six columns")
         for elem in six_columns:
             paragraph_with_link = elem.find("p")
-            if paragraph_with_link == None:
+            if paragraph_with_link is None:
                 continue
             else:
                 text = paragraph_with_link.text
                 elem_a = paragraph_with_link.find("a")
-                if elem_a == None:
+                if elem_a is None:
                     continue
                 else:
                     full_link = base_url + elem_a["href"]
                     second_level_urls[full_link] = text
-    download_case_files(base_url, second_level_urls)
+    _download_case_files(base_url, second_level_urls)
     return second_level_urls
 
 
-def download_case_files(base_url, second_level_urls):
+def _download_case_files(base_url, second_level_urls):
     all_case_content_links = []
     for url in second_level_urls.keys():
         page = requests.get(url)

diff --git a/clean/cli.py b/clean/cli.py
@@ -8,6 +8,7 @@
 
 @click.group()
 def cli():
+    """Command-line interface for downloading CLEAN files."""
     pass
 
 
@@ -17,9 +18,8 @@ def list_agencies():
 
     Agency slugs can then used to with the scrape subcommand
     """
-    scrapers = utils.get_all_scrapers()
     for state, agency_slugs in utils.get_all_scrapers().items():
-        click.echo(f"{state.upper()}:")
+        click.echo(f"\n{state.upper()}:")
         for slug in sorted(agency_slugs):
             click.echo(f" - {state}_{slug}")
     message = (

diff --git a/clean/utils.py b/clean/utils.py
@@ -1,7 +1,6 @@
 import csv
 import logging
 import os
-import typing
 from pathlib import Path
 from time import sleep
 
@@ -138,7 +137,6 @@ def get_all_scrapers():
 
     Returns: Dictionary of agency slugs grouped by state postal.
     """
-    this_dir = Path(__file__).parent
     # Filter out anything not in a state folder
     abbrevs = [state.abbr.lower() for state in us.states.STATES]
     # Get all folders in dir

diff --git a/setup.cfg b/setup.cfg
@@ -2,4 +2,4 @@
 test=pytest
 
 [flake8]
-extend-ignore = D100,D104,E203,E501
+extend-ignore = B006,D100,D104,E203,E501
diff --git a/setup.py b/setup.py
@@ -1,16 +1,9 @@
 #!/usr/bin/env python
 """Configure the package for distribution."""
-import distutils.cmd
 import os
-from importlib import import_module
-from pathlib import Path
 
-import jinja2
-import us
 from setuptools import find_packages, setup
 
-import clean
-
 
 def read(file_name):
     """Read the provided file."""