Skip to content

Commit

Permalink
Merge branch 'apify:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
daball authored Jun 6, 2024
2 parents c3c5e76 + 31083aa commit 47e8bb1
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 10 deletions.
9 changes: 9 additions & 0 deletions .eslintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,15 @@
"rules": {
"@typescript-eslint/array-type": "error",
"@typescript-eslint/ban-ts-comment": 0,
"@typescript-eslint/ban-types": [
"error",
{
"types": {
"{}": false
},
"extendDefaults": true
}
],
"@typescript-eslint/consistent-type-imports": [
"error",
{
Expand Down
1 change: 0 additions & 1 deletion packages/core/src/crawlers/crawler_commons.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import { KeyValueStore } from '../storages';

export interface RestrictedCrawlingContext<UserData extends Dictionary = Dictionary>
// we need `Record<string & {}, unknown>` here, otherwise `Omit<Context>` is resolved badly
// eslint-disable-next-line
extends Record<string & {}, unknown> {
/**
* The original {@apilink Request} object.
Expand Down
2 changes: 0 additions & 2 deletions packages/core/src/typedefs.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
/* eslint-disable @typescript-eslint/ban-types */

/** @ignore */
export type Constructor<T = unknown> = new (...args: any[]) => T;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { addTimeoutToPromise } from '@apify/timeout';
import { extractUrlsFromPage } from '@crawlee/browser';
import { extractUrlsFromPage, type RouterHandler } from '@crawlee/browser';
import type {
RestrictedCrawlingContext,
StatisticState,
Expand Down Expand Up @@ -98,7 +98,7 @@ export interface AdaptivePlaywrightCrawlerOptions
* If the function throws an exception, the crawler will try to re-crawl the
* request later, up to `option.maxRequestRetries` times.
*/
requestHandler: (crawlingContext: AdaptivePlaywrightCrawlerContext) => Awaitable<void>;
requestHandler?: (crawlingContext: AdaptivePlaywrightCrawlerContext) => Awaitable<void>;

/**
* Specifies the frequency of rendering type detection checks - 0.1 means roughly 10% of requests.
Expand Down Expand Up @@ -156,12 +156,20 @@ export interface AdaptivePlaywrightCrawlerOptions
* @experimental
*/
export class AdaptivePlaywrightCrawler extends PlaywrightCrawler {
private adaptiveRequestHandler: AdaptivePlaywrightCrawlerOptions['requestHandler'];
private adaptiveRequestHandler: AdaptivePlaywrightCrawlerOptions['requestHandler'] & {};
private renderingTypePredictor: NonNullable<AdaptivePlaywrightCrawlerOptions['renderingTypePredictor']>;
private resultChecker: NonNullable<AdaptivePlaywrightCrawlerOptions['resultChecker']>;
private resultComparator: NonNullable<AdaptivePlaywrightCrawlerOptions['resultComparator']>;
override readonly stats: AdaptivePlaywrightCrawlerStatistics;

/**
* Default {@apilink Router} instance that will be used if we don't specify any {@apilink AdaptivePlaywrightCrawlerOptions.requestHandler|`requestHandler`}.
* See {@apilink Router.addHandler|`router.addHandler()`} and {@apilink Router.addDefaultHandler|`router.addDefaultHandler()`}.
*/
// @ts-ignore
override readonly router: RouterHandler<AdaptivePlaywrightCrawlerContext> =
Router.create<AdaptivePlaywrightCrawlerContext>();

constructor(
{
requestHandler,
Expand All @@ -175,7 +183,7 @@ export class AdaptivePlaywrightCrawler extends PlaywrightCrawler {
override readonly config = Configuration.getGlobalConfig(),
) {
super(options, config);
this.adaptiveRequestHandler = requestHandler;
this.adaptiveRequestHandler = requestHandler ?? this.router;
this.renderingTypePredictor =
renderingTypePredictor ?? new RenderingTypePredictor({ detectionRatio: renderingTypeDetectionRatio });
this.resultChecker = resultChecker ?? (() => true);
Expand Down Expand Up @@ -322,7 +330,7 @@ export class AdaptivePlaywrightCrawler extends PlaywrightCrawler {
log: crawlingContext.log,
querySelector: async (selector, timeoutMs) => {
const locator = playwrightContext.page.locator(selector).first();
await locator.waitFor({ timeout: timeoutMs });
await locator.waitFor({ timeout: timeoutMs, state: 'attached' });
return (await playwrightContext.parseWithCheerio())(
selector,
) as Cheerio<Element>;
Expand Down
2 changes: 0 additions & 2 deletions packages/utils/src/internals/typedefs.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
/* eslint-disable @typescript-eslint/ban-types */

/** @ignore */
export function entries<T extends {}>(obj: T) {
return Object.entries(obj) as [keyof T, T[keyof T]][];
Expand Down

0 comments on commit 47e8bb1

Please sign in to comment.