Skip to content

Commit

Permalink
LinkedIn scraper fixes: (#159)
Browse files Browse the repository at this point in the history
Correct initial page offset calculation
Separate page variable from request counter
Fix job offset starting value
Increment offset by number of jobs returned instead of expected value
  • Loading branch information
adamagassi authored May 28, 2024
1 parent 5cb7ffe commit 7f6271b
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions src/jobspy/scrapers/linkedin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,17 @@ def scrape(self, scraper_input: ScraperInput) -> JobResponse:
job_list: list[JobPost] = []
seen_urls = set()
url_lock = Lock()
page = scraper_input.offset // 25 + 25 if scraper_input.offset else 0
page = scraper_input.offset // 25 * 25 if scraper_input.offset else 0
request_count = 0
seconds_old = (
scraper_input.hours_old * 3600 if scraper_input.hours_old else None
)
continue_search = (
lambda: len(job_list) < scraper_input.results_wanted and page < 1000
)
while continue_search():
logger.info(f"LinkedIn search page: {page // 25 + 1}")
request_count += 1
logger.info(f"LinkedIn search page: {request_count}")
params = {
"keywords": scraper_input.search_term,
"location": scraper_input.location,
Expand All @@ -92,7 +94,7 @@ def scrape(self, scraper_input: ScraperInput) -> JobResponse:
else None
),
"pageNum": 0,
"start": page + scraper_input.offset,
"start": page,
"f_AL": "true" if scraper_input.easy_apply else None,
"f_C": (
",".join(map(str, scraper_input.linkedin_company_ids))
Expand Down Expand Up @@ -156,7 +158,7 @@ def scrape(self, scraper_input: ScraperInput) -> JobResponse:

if continue_search():
time.sleep(random.uniform(self.delay, self.delay + self.band_delay))
page += self.jobs_per_page
page += len(job_list)

job_list = job_list[: scraper_input.results_wanted]
return JobResponse(jobs=job_list)
Expand Down

0 comments on commit 7f6271b

Please sign in to comment.