diff --git a/packages/core/src/enqueue_links/shared.ts b/packages/core/src/enqueue_links/shared.ts index 5223a5d6f541..bebf020794db 100644 --- a/packages/core/src/enqueue_links/shared.ts +++ b/packages/core/src/enqueue_links/shared.ts @@ -208,19 +208,21 @@ export function filterRequestsByPatterns(requests: Request[], patterns?: UrlPatt */ export function createRequestOptions( sources: (string | Record)[], - options: Pick = {}, + options: Pick = {}, ): RequestOptions[] { return sources .map((src) => (typeof src === 'string' ? { url: src } : src as unknown as RequestOptions)) .filter(({ url }) => { try { - return new URL(url).href; + return new URL(url, options.baseUrl).href; } catch (err) { return false; } }) .map((requestOptions) => { + requestOptions.url = new URL(requestOptions.url, options.baseUrl).href; requestOptions.userData ??= options.userData ?? {}; + if (typeof options.label === 'string') { requestOptions.userData = { ...requestOptions.userData, diff --git a/test/core/enqueue_links/enqueue_links.test.ts b/test/core/enqueue_links/enqueue_links.test.ts index 0d71fac02cda..04274d15deec 100644 --- a/test/core/enqueue_links/enqueue_links.test.ts +++ b/test/core/enqueue_links/enqueue_links.test.ts @@ -853,6 +853,29 @@ describe('enqueueLinks()', () => { expect(enqueued[2].userData).toEqual({}); }); + test('correctly resolves relative URLs with `urls` option', async () => { + const { enqueued, requestQueue } = createRequestQueueMock(); + await cheerioCrawlerEnqueueLinks({ + options: { + baseUrl: 'http://www.absolute.com/removethis/', + urls: ['/relative/url1', '/relative/url2'], + }, + $, + requestQueue, + originalRequestUrl: 'https://example.com', + }); + + expect(enqueued).toHaveLength(2); + + expect(enqueued[0].url).toBe('http://www.absolute.com/relative/url1'); + expect(enqueued[0].method).toBe('GET'); + expect(enqueued[0].userData).toEqual({}); + + expect(enqueued[1].url).toBe('http://www.absolute.com/relative/url2'); + expect(enqueued[1].method).toBe('GET'); + expect(enqueued[1].userData).toEqual({}); + }); + test('correctly works with transformRequestFunction', async () => { const { enqueued, requestQueue } = createRequestQueueMock(); const pseudoUrls = [