Skip to content

Commit

Permalink
Add missing docblocks
Browse files Browse the repository at this point in the history
  • Loading branch information
janbuchar committed Oct 4, 2024
1 parent e97084f commit 7f22ad5
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 25 deletions.
4 changes: 4 additions & 0 deletions packages/basic-crawler/src/internals/basic-crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,10 @@ export interface BasicCrawlerOptions<Context extends CrawlingContext = BasicCraw
*/
statisticsOptions?: StatisticsOptions;

/**
* HTTP client implementation for the `sendRequest` context helper and for plain HTTP crawling.
* Defaults to a new instance of {@apilink GotScrapingHttpClient}
*/
httpClient?: BaseHttpClient;
}

Expand Down
6 changes: 6 additions & 0 deletions packages/basic-crawler/src/internals/send-request.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@ import {
import type { Method, GotResponse } from 'got-scraping';

/**
* Prepares a function to be used as the `sendRequest` context helper.
*
* @internal
* @param httpClient The HTTP client that will perform the requests.
* @param originRequest The crawling request being processed.
* @param session The user session associated with the current request.
* @param getProxyUrl A function that will return the proxy URL that should be used for handling the request.
*/
export function createSendRequest(
httpClient: BaseHttpClient,
Expand Down
63 changes: 38 additions & 25 deletions packages/core/src/http_clients/base_http_client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ type Method =
| 'options'
| 'trace';

/**
* Maps permitted values of the `responseType` option on {@apilink HttpRequest} to the types that they produce.
*/
export interface ResponseTypes {
'json': unknown;
'text': string;
Expand Down Expand Up @@ -69,31 +72,9 @@ interface PromiseCookieJar {

type SimpleHeaders = Record<string, string | string[] | undefined>;

// Omitted (https://github.com/sindresorhus/got/blob/main/documentation/2-options.md):
// - decompress,
// - resolveBodyOnly,
// - allowGetBody,
// - dnsLookup,
// - dnsCache,
// - dnsLookupIpVersion,
// - retry,
// - hooks,
// - parseJson,
// - stringifyJson,
// - request,
// - cache,
// - cacheOptions,
// - http2
// - https
// - agent
// - localAddress
// - createConnection
// - pagination
// - setHost
// - maxHeaderSize
// - methodRewriting
// - enableUnixSockets
// - context
/**
* HTTP Request as accepted by {@apilink BaseHttpClient} methods.
*/
export interface HttpRequest<TResponseType extends keyof ResponseTypes = 'text'> {
[k: string]: unknown; // TODO BC with got - remove in 4.0

Expand Down Expand Up @@ -124,17 +105,28 @@ export interface HttpRequest<TResponseType extends keyof ResponseTypes = 'text'>
sessionToken?: object;
}

/**
* Additional options for HTTP requests that need to be handled separately before passing to {@apilink BaseHttpClient}.
*/
export interface HttpRequestOptions<TResponseType extends keyof ResponseTypes = 'text'>
extends HttpRequest<TResponseType> {
/** Search (query string) parameters to be appended to the request URL */
searchParams?: SearchParams;

/** A form to be sent in the HTTP request body (URL encoding will be used) */
form?: Record<string, string>;
/** Artbitrary object to be JSON-serialized and sent as the HTTP request body */
json?: unknown;

/** Basic HTTP Auth username */
username?: string;
/** Basic HTTP Auth password */
password?: string;
}

/**
* HTTP response data, without a body, as returned by {@apilink BaseHttpClient} methods.
*/
export interface BaseHttpResponseData {
redirectUrls: URL[];
url: string;
Expand All @@ -154,32 +146,53 @@ interface HttpResponseWithoutBody<TResponseType extends keyof ResponseTypes = ke
request: HttpRequest<TResponseType>;
}

/**
* HTTP response data as returned by the {@apilink BaseHttpClient.sendRequest} method.
*/
export interface HttpResponse<TResponseType extends keyof ResponseTypes = keyof ResponseTypes>
extends HttpResponseWithoutBody<TResponseType> {
[k: string]: any; // TODO BC with got - remove in 4.0

body: ResponseTypes[TResponseType];
}

/**
* HTTP response data as returned by the {@apilink BaseHttpClient.stream} method.
*/
export interface StreamingHttpResponse extends HttpResponseWithoutBody {
stream: Readable;
readonly downloadProgress: Progress;
readonly uploadProgress: Progress;
}

/**
* Type of a function called when an HTTP redirect takes place. It is allowed to mutate the `updatedRequest` argument.
*/
export type RedirectHandler = (
redirectResponse: BaseHttpResponseData,
updatedRequest: { url?: string | URL; headers: SimpleHeaders },
) => void;

/**
* Interface for user-defined HTTP clients to be used for plain HTTP crawling and for sending additional requests during a crawl.
*/
export interface BaseHttpClient {
/**
* Perform an HTTP Request and return the complete response.
*/
sendRequest<TResponseType extends keyof ResponseTypes = 'text'>(
request: HttpRequest<TResponseType>,
): Promise<HttpResponse<TResponseType>>;

/**
* Perform an HTTP Request and return after the response headers are received. The body may be read from a stream contained in the response.
*/
stream(request: HttpRequest, onRedirect?: RedirectHandler): Promise<StreamingHttpResponse>;
}

/**
* Converts {@apilink HttpRequestOptions} to a {@apilink HttpRequest}.
*/
export function processHttpRequestOptions<TResponseType extends keyof ResponseTypes = 'text'>({
searchParams,
form,
Expand Down
9 changes: 9 additions & 0 deletions packages/core/src/http_clients/got_scraping_http_client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,13 @@ import type {
BaseHttpClient,
} from './base_http_client';

/**
* A HTTP client implementation based on the `got-scraping` library.
*/
export class GotScrapingHttpClient implements BaseHttpClient {
/**
* @inheritDoc
*/
async sendRequest<TResponseType extends keyof ResponseTypes>(
request: HttpRequest<TResponseType>,
): Promise<HttpResponse<TResponseType>> {
Expand All @@ -30,6 +36,9 @@ export class GotScrapingHttpClient implements BaseHttpClient {
};
}

/**
* @inheritDoc
*/
async stream(request: HttpRequest, handleRedirect?: RedirectHandler): Promise<StreamingHttpResponse> {
// eslint-disable-next-line no-async-promise-executor
return new Promise(async (resolve, reject) => {
Expand Down
6 changes: 6 additions & 0 deletions packages/utils/src/internals/url.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
export type SearchParams = string | URLSearchParams | Record<string, string | number | boolean | null | undefined>;

/**
* Appends search (query string) parameters to a URL, replacing the original value (if any).
*
* @param url The URL to append to.
* @param searchParams The search parameters to be appended.
*/
export function applySearchParams(url: URL, searchParams: SearchParams | undefined): void {
if (searchParams === undefined) {
return;
Expand Down

0 comments on commit 7f22ad5

Please sign in to comment.