Skip to content

Commit

Permalink
fix: case_number >> case_id
Browse files Browse the repository at this point in the history
  • Loading branch information
newsroomdev committed Aug 19, 2024
1 parent 5a8c0a7 commit ac23bd2
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions clean/ca/humboldt_pd.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def _get_asset_links(self, pages, parent_page) -> list:
name = link.string
payload = {
"title": title,
"case_number": name,
"case_id": name,
"parent_page": str(parent_page),
"asset_url": f"{'https://humboldtgov.org'}{href}",
"name": name,
Expand All @@ -107,12 +107,12 @@ def _get_asset_links(self, pages, parent_page) -> list:
if soup.title and isinstance(soup.title.string, str)
else None
)
case_number = page["page_name"].split("/")[-1].split("_")[0]
case_id = page["page_name"].split("/")[-1].split("_")[0]
header = soup.find("h1")
name = header.get_text(strip=True) if header else None
payload = {
"title": title,
"case_number": case_number,
"case_id": case_id,
"parent_page": str(parent_page),
"download_page": str(page["page_name"]),
"asset_url": f"https://humboldtgov.nextrequest.com{link['href']}",
Expand All @@ -122,7 +122,7 @@ def _get_asset_links(self, pages, parent_page) -> list:
return metadata

def _make_download_path(self, asset):
folder_name = asset["case_number"]
folder_name = asset["case_id"]
name = asset["name"]
# If name has has no extension mark it as pdf as its a document format by meta-data
if len(name.split(".")) == 1:
Expand Down

0 comments on commit ac23bd2

Please sign in to comment.