Skip to content

Commit

Permalink
perf: optimize RequestList memory footprint (#2466)
Browse files Browse the repository at this point in the history
The request list now delays the conversion of the source items into the
`Request` objects, resulting in a significantly less memory footprint.

Related: https://apify.slack.com/archives/C0L33UM7Z/p1715109984834079
  • Loading branch information
B4nan authored May 15, 2024
1 parent 38c0942 commit 12210bd
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 18 deletions.
20 changes: 16 additions & 4 deletions packages/core/src/request.ts
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ export class Request<UserData extends Dictionary = Dictionary> {
this.id = id;
this.url = url;
this.loadedUrl = loadedUrl;
this.uniqueKey = uniqueKey || this._computeUniqueKey({ url, method, payload, keepUrlFragment, useExtendedUniqueKey });
this.uniqueKey = uniqueKey || Request.computeUniqueKey({ url, method, payload, keepUrlFragment, useExtendedUniqueKey });
this.method = method;
this.payload = payload;
this.noRetry = noRetry;
Expand Down Expand Up @@ -378,7 +378,18 @@ export class Request<UserData extends Dictionary = Dictionary> {
this.errorMessages.push(message);
}

protected _computeUniqueKey({ url, method, payload, keepUrlFragment, useExtendedUniqueKey }: ComputeUniqueKeyOptions) {
// TODO: only for better BC, remove in v4
protected _computeUniqueKey(options: ComputeUniqueKeyOptions) {
return Request.computeUniqueKey(options);
}

// TODO: only for better BC, remove in v4
protected _hashPayload(payload: BinaryLike): string {
return Request.hashPayload(payload);
}

/** @internal */
static computeUniqueKey({ url, method = 'GET', payload, keepUrlFragment = false, useExtendedUniqueKey = false }: ComputeUniqueKeyOptions) {
const normalizedMethod = method.toUpperCase();
const normalizedUrl = normalizeUrl(url, keepUrlFragment) || url; // It returns null when url is invalid, causing weird errors.
if (!useExtendedUniqueKey) {
Expand All @@ -390,11 +401,12 @@ export class Request<UserData extends Dictionary = Dictionary> {
}
return normalizedUrl;
}
const payloadHash = payload ? this._hashPayload(payload) : '';
const payloadHash = payload ? Request.hashPayload(payload) : '';
return `${normalizedMethod}(${payloadHash}):${normalizedUrl}`;
}

protected _hashPayload(payload: BinaryLike): string {
/** @internal */
static hashPayload(payload: BinaryLike): string {
return crypto
.createHash('sha256')
.update(payload)
Expand Down
33 changes: 22 additions & 11 deletions packages/core/src/storages/request_list.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ import type { EventManager } from '../events';
import { EventType } from '../events';
import { log } from '../log';
import type { ProxyConfiguration } from '../proxy_configuration';
import type { InternalSource, RequestOptions, Source } from '../request';
import { Request } from '../request';
import { type InternalSource, type RequestOptions, Request, type Source } from '../request';
import { createDeserialize, serializeArray } from '../serialization';

/** @internal */
Expand Down Expand Up @@ -238,7 +237,7 @@ export class RequestList {
* All requests in the array have distinct uniqueKey!
* @internal
*/
requests: Request[] = [];
requests: (Request | RequestOptions)[] = [];

/** Index to the next item in requests array to fetch. All previous requests are either handled or in progress. */
private nextIndex = 0;
Expand Down Expand Up @@ -551,7 +550,7 @@ export class RequestList {
return {
nextIndex: this.nextIndex,
nextUniqueKey: this.nextIndex < this.requests.length
? this.requests[this.nextIndex].uniqueKey
? this.requests[this.nextIndex].uniqueKey!
: null,
inProgress: [...this.inProgress],
};
Expand Down Expand Up @@ -593,21 +592,31 @@ export class RequestList {
if (uniqueKey) {
this.reclaimed.delete(uniqueKey);
const index = this.uniqueKeyToIndex[uniqueKey];
return this.requests[index];
return this.ensureRequest(this.requests[index], index);
}

// Otherwise return next request.
if (this.nextIndex < this.requests.length) {
const request = this.requests[this.nextIndex];
this.inProgress.add(request.uniqueKey);
const index = this.nextIndex;
const request = this.requests[index];
this.inProgress.add(request.uniqueKey!);
this.nextIndex++;
this.isStatePersisted = false;
return request;
return this.ensureRequest(request, index);
}

return null;
}

private ensureRequest(requestLike: Request | RequestOptions, index: number): Request {
if (requestLike instanceof Request) {
return requestLike;
}

this.requests[index] = new Request(requestLike);
return this.requests[index] as Request;
}

/**
* Marks request as handled after successful processing.
*/
Expand Down Expand Up @@ -694,19 +703,21 @@ export class RequestList {
* of a `Request`, then the function creates a `Request` instance.
*/
protected _addRequest(source: RequestListSource) {
let request;
let request: Request | RequestOptions;
const type = typeof source;

if (type === 'string') {
request = new Request({ url: source as string });
request = { url: source as string };
} else if (source instanceof Request) {
request = source;
} else if (source && type === 'object') {
request = new Request(source as RequestOptions);
request = source as RequestOptions;
} else {
throw new Error(`Cannot create Request from type: ${type}`);
}

const hasUniqueKey = Reflect.has(Object(source), 'uniqueKey');
request.uniqueKey ??= Request.computeUniqueKey(request as any);

// Add index to uniqueKey if duplicates are to be kept
if (this.keepDuplicateUrls && !hasUniqueKey) {
Expand Down
6 changes: 3 additions & 3 deletions test/core/request_list.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,7 @@ describe('RequestList', () => {
const name = 'xxx';
const SDK_KEY = `SDK_${name}`;
const sources = ['https://example.com'];
const requests = sources.map((url) => new Request({ url }));
const requests = sources.map((url) => ({ url, uniqueKey: url }));

const rl = await RequestList.open(name, sources);
expect(rl).toBeInstanceOf(RequestList);
Expand All @@ -752,7 +752,7 @@ describe('RequestList', () => {
const SDK_KEY = `SDK_${name}`;
let counter = 0;
const sources = [{ url: 'https://example.com' }];
const requests = sources.map(({ url }) => new Request({ url, uniqueKey: `${url}-${counter++}` }));
const requests = sources.map(({ url }) => ({ url, uniqueKey: `${url}-${counter++}` }));
const options = {
keepDuplicateUrls: true,
persistStateKey: 'yyy',
Expand Down Expand Up @@ -780,7 +780,7 @@ describe('RequestList', () => {

const name: string = null;
const sources = [{ url: 'https://example.com' }];
const requests = sources.map(({ url }) => new Request({ url }));
const requests = sources.map(({ url }) => ({ url, uniqueKey: url }));

const rl = await RequestList.open(name, sources);
expect(rl).toBeInstanceOf(RequestList);
Expand Down

0 comments on commit 12210bd

Please sign in to comment.