Skip to content

Commit

Permalink
enh: listing source (#168)
Browse files Browse the repository at this point in the history
  • Loading branch information
cullenwatson authored Jul 16, 2024
1 parent 0988230 commit edffe18
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 4 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "python-jobspy"
version = "1.1.57"
version = "1.1.58"
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
authors = ["Zachary Hampton <[email protected]>", "Cullen Watson <[email protected]>"]
homepage = "https://github.com/Bunsly/JobSpy"
Expand Down
1 change: 1 addition & 0 deletions src/jobspy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ def convert_to_annual(job_data: dict):
"currency",
"is_remote",
"job_function",
"listing_type",
"emails",
"description",
"company_url",
Expand Down
1 change: 1 addition & 0 deletions src/jobspy/jobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ class JobPost(BaseModel):
date_posted: date | None = None
emails: list[str] | None = None
is_remote: bool | None = None
listing_type: str | None = None

# indeed specific
company_addresses: str | None = None
Expand Down
11 changes: 10 additions & 1 deletion src/jobspy/scrapers/glassdoor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,15 @@ def _process_job(self, job_data):
except:
description = None
company_url = f"{self.base_url}Overview/W-EI_IE{company_id}.htm"
company_logo = job_data["jobview"].get("overview", {}).get("squareLogoUrl", None)
company_logo = (
job_data["jobview"].get("overview", {}).get("squareLogoUrl", None)
)
listing_type = (
job_data["jobview"]
.get("header", {})
.get("adOrderSponsorshipLevel", "")
.lower()
)
return JobPost(
id=str(job_id),
title=title,
Expand All @@ -203,6 +211,7 @@ def _process_job(self, job_data):
description=description,
emails=extract_emails_from_text(description) if description else None,
logo_photo_url=company_logo,
listing_type=listing_type,
)

def _fetch_job_description(self, job_id):
Expand Down
6 changes: 4 additions & 2 deletions src/jobspy/scrapers/indeed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def _build_filters(self):
keys.append("DSQF7")

if keys:
keys_str = '", "'.join(keys) # Prepare your keys string
keys_str = '", "'.join(keys)
filters_str = f"""
filters: {{
composite: {{
Expand Down Expand Up @@ -353,7 +353,6 @@ def _get_compensation_interval(interval: str) -> CompensationInterval:
jobSearch(
{what}
{location}
includeSponsoredResults: NONE
limit: 100
sort: DATE
{cursor}
Expand All @@ -365,6 +364,9 @@ def _get_compensation_interval(interval: str) -> CompensationInterval:
results {{
trackingKey
job {{
source {{
name
}}
key
title
datePublished
Expand Down
2 changes: 2 additions & 0 deletions src/jobspy/scrapers/ziprecruiter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ def _process_job(self, job: dict) -> JobPost | None:
self.seen_urls.add(job_url)

description = job.get("job_description", "").strip()
listing_type = job.get("buyer_type", "")
description = (
markdown_converter(description)
if self.scraper_input.description_format == DescriptionFormat.MARKDOWN
Expand Down Expand Up @@ -175,6 +176,7 @@ def _process_job(self, job: dict) -> JobPost | None:
description=description_full if description_full else description,
emails=extract_emails_from_text(description) if description else None,
job_url_direct=job_url_direct,
listing_type=listing_type,
)

def _get_descr(self, job_url):
Expand Down

0 comments on commit edffe18

Please sign in to comment.