Skip to content

Commit

Permalink
fix(core): support relative links in enqueueLinks explicitly provid…
Browse files Browse the repository at this point in the history
…ed via `urls` option (#2014)

Closes #2005
  • Loading branch information
B4nan authored Jul 27, 2023
1 parent 8c0928b commit cbd9d08
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 2 deletions.
6 changes: 4 additions & 2 deletions packages/core/src/enqueue_links/shared.ts
Original file line number Diff line number Diff line change
Expand Up @@ -208,19 +208,21 @@ export function filterRequestsByPatterns(requests: Request[], patterns?: UrlPatt
*/
export function createRequestOptions(
sources: (string | Record<string, unknown>)[],
options: Pick<EnqueueLinksOptions, 'label' | 'userData'> = {},
options: Pick<EnqueueLinksOptions, 'label' | 'userData' | 'baseUrl'> = {},
): RequestOptions[] {
return sources
.map((src) => (typeof src === 'string' ? { url: src } : src as unknown as RequestOptions))
.filter(({ url }) => {
try {
return new URL(url).href;
return new URL(url, options.baseUrl).href;
} catch (err) {
return false;
}
})
.map((requestOptions) => {
requestOptions.url = new URL(requestOptions.url, options.baseUrl).href;
requestOptions.userData ??= options.userData ?? {};

if (typeof options.label === 'string') {
requestOptions.userData = {
...requestOptions.userData,
Expand Down
23 changes: 23 additions & 0 deletions test/core/enqueue_links/enqueue_links.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -853,6 +853,29 @@ describe('enqueueLinks()', () => {
expect(enqueued[2].userData).toEqual({});
});

test('correctly resolves relative URLs with `urls` option', async () => {
const { enqueued, requestQueue } = createRequestQueueMock();
await cheerioCrawlerEnqueueLinks({
options: {
baseUrl: 'http://www.absolute.com/removethis/',
urls: ['/relative/url1', '/relative/url2'],
},
$,
requestQueue,
originalRequestUrl: 'https://example.com',
});

expect(enqueued).toHaveLength(2);

expect(enqueued[0].url).toBe('http://www.absolute.com/relative/url1');
expect(enqueued[0].method).toBe('GET');
expect(enqueued[0].userData).toEqual({});

expect(enqueued[1].url).toBe('http://www.absolute.com/relative/url2');
expect(enqueued[1].method).toBe('GET');
expect(enqueued[1].userData).toEqual({});
});

test('correctly works with transformRequestFunction', async () => {
const { enqueued, requestQueue } = createRequestQueueMock();
const pseudoUrls = [
Expand Down

0 comments on commit cbd9d08

Please sign in to comment.