Skip to content

Commit

Permalink
linting and other minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
zstumgoren committed Apr 10, 2024
1 parent abcca33 commit 3262148
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 19 deletions.
15 changes: 8 additions & 7 deletions clean/ca/san_diego_pd.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@


def scrape(data_dir=utils.CLEAN_DATA_DIR, cache_dir=utils.CLEAN_CACHE_DIR, throttle=0):
"""Scrape San Diego Police Department for SB16/SB1421/AB748 data."""
cache = Cache(cache_dir)
# This module
mod = Path(__file__)
Expand All @@ -31,7 +32,7 @@ def scrape(data_dir=utils.CLEAN_DATA_DIR, cache_dir=utils.CLEAN_CACHE_DIR, throt
def download_index_pages(
base_url, cache, cache_suffix, throttle, page_count, current_page, index_pages=[]
):
"""Download index pages for SB16/SB1421/AB748
"""Download index pages for SB16/SB1421/AB748.
Index pages link to child pages containing videos and
other files related to use-of-force and disciplinary incidents.
Expand Down Expand Up @@ -68,8 +69,8 @@ def download_index_pages(
return index_pages


### LEGACY CODE BELOW ###
def scrape_list_page(cache, top_level_urls, base_url, throttle):
# LEGACY CODE BELOW #
def _scrape_list_page(cache, top_level_urls, base_url, throttle):
second_level_urls = {}
for top_url in top_level_urls:
page = requests.get(top_url)
Expand All @@ -78,21 +79,21 @@ def scrape_list_page(cache, top_level_urls, base_url, throttle):
six_columns = soup.find_all("div", class_="six columns")
for elem in six_columns:
paragraph_with_link = elem.find("p")
if paragraph_with_link == None:
if paragraph_with_link is None:
continue
else:
text = paragraph_with_link.text
elem_a = paragraph_with_link.find("a")
if elem_a == None:
if elem_a is None:
continue
else:
full_link = base_url + elem_a["href"]
second_level_urls[full_link] = text
download_case_files(base_url, second_level_urls)
_download_case_files(base_url, second_level_urls)
return second_level_urls


def download_case_files(base_url, second_level_urls):
def _download_case_files(base_url, second_level_urls):
all_case_content_links = []
for url in second_level_urls.keys():
page = requests.get(url)
Expand Down
4 changes: 2 additions & 2 deletions clean/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

@click.group()
def cli():
"""Command-line interface for downloading CLEAN files."""
pass


Expand All @@ -17,9 +18,8 @@ def list_agencies():
Agency slugs can then used to with the scrape subcommand
"""
scrapers = utils.get_all_scrapers()
for state, agency_slugs in utils.get_all_scrapers().items():
click.echo(f"{state.upper()}:")
click.echo(f"\n{state.upper()}:")
for slug in sorted(agency_slugs):
click.echo(f" - {state}_{slug}")
message = (
Expand Down
2 changes: 0 additions & 2 deletions clean/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import csv
import logging
import os
import typing
from pathlib import Path
from time import sleep

Expand Down Expand Up @@ -138,7 +137,6 @@ def get_all_scrapers():
Returns: Dictionary of agency slugs grouped by state postal.
"""
this_dir = Path(__file__).parent
# Filter out anything not in a state folder
abbrevs = [state.abbr.lower() for state in us.states.STATES]
# Get all folders in dir
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
test=pytest

[flake8]
extend-ignore = D100,D104,E203,E501
extend-ignore = B006,D100,D104,E203,E501
7 changes: 0 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,9 @@
#!/usr/bin/env python
"""Configure the package for distribution."""
import distutils.cmd
import os
from importlib import import_module
from pathlib import Path

import jinja2
import us
from setuptools import find_packages, setup

import clean


def read(file_name):
"""Read the provided file."""
Expand Down

0 comments on commit 3262148

Please sign in to comment.