From 1106d3aeccd9d1aca8b2630d720d3ea6a1c955f6 Mon Sep 17 00:00:00 2001 From: "Kaffi Y." Date: Mon, 30 Sep 2024 17:57:49 +0800 Subject: [PATCH] fix(http-crawler): avoid crashing when gotOptions.cache is on (#2686) When the response is from cache, http-crawler will throw "TypeError: Cannot convert undefined or null to object" This PR fixes this issue. Related usage is added as test case as well. --- .../src/internals/http-crawler.ts | 4 +-- test/core/crawlers/http_crawler.test.ts | 26 +++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/packages/http-crawler/src/internals/http-crawler.ts b/packages/http-crawler/src/internals/http-crawler.ts index 79bd77a8f1cc..883fb3bec4fd 100644 --- a/packages/http-crawler/src/internals/http-crawler.ts +++ b/packages/http-crawler/src/internals/http-crawler.ts @@ -977,9 +977,9 @@ function addResponsePropertiesToStream(stream: GotRequest) { response.on('end', () => { // @ts-expect-error - Object.assign(stream.rawTrailers, response.rawTrailers); + if (stream.rawTrailers) Object.assign(stream.rawTrailers, response.rawTrailers); // @ts-expect-error - Object.assign(stream.trailers, response.trailers); + if (stream.trailers) Object.assign(stream.trailers, response.trailers); // @ts-expect-error stream.complete = response.complete; diff --git a/test/core/crawlers/http_crawler.test.ts b/test/core/crawlers/http_crawler.test.ts index a639722fac50..296235181056 100644 --- a/test/core/crawlers/http_crawler.test.ts +++ b/test/core/crawlers/http_crawler.test.ts @@ -374,3 +374,29 @@ test('should retry on 403 even with disallowed content-type', async () => { expect(succeeded).toHaveLength(1); expect(succeeded[0].retryCount).toBe(1); }); + +test('should work with cacheable-request', async () => { + const isFromCache: Record = {}; + const cache = new Map(); + const crawler = new HttpCrawler({ + maxConcurrency: 1, + preNavigationHooks: [ + async (_, gotOptions) => { + gotOptions.cache = cache; + gotOptions.headers = { + ...gotOptions.headers, + // to force cache + 'cache-control': 'max-stale', + }; + }, + ], + requestHandler: async ({ request, response }) => { + isFromCache[request.uniqueKey] = response.isFromCache; + }, + }); + await crawler.run([ + { url, uniqueKey: 'first' }, + { url, uniqueKey: 'second' }, + ]); + expect(isFromCache).toEqual({ first: false, second: true }); +});