From 1bb7712913040ef1a7c48c66d4f491abbccfd087 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Thu, 23 May 2024 10:41:11 +0200 Subject: [PATCH] test: add e2e test for zero concurrency with RQ v2 --- .../actor/.actor/actor.json | 7 ++++ .../actor/.gitignore | 7 ++++ .../actor/Dockerfile | 16 +++++++++ .../actor/main.js | 33 +++++++++++++++++++ .../actor/package.json | 28 ++++++++++++++++ .../test.mjs | 12 +++++++ 6 files changed, 103 insertions(+) create mode 100644 test/e2e/request-queue-v2-zero-concurrency/actor/.actor/actor.json create mode 100644 test/e2e/request-queue-v2-zero-concurrency/actor/.gitignore create mode 100644 test/e2e/request-queue-v2-zero-concurrency/actor/Dockerfile create mode 100644 test/e2e/request-queue-v2-zero-concurrency/actor/main.js create mode 100644 test/e2e/request-queue-v2-zero-concurrency/actor/package.json create mode 100644 test/e2e/request-queue-v2-zero-concurrency/test.mjs diff --git a/test/e2e/request-queue-v2-zero-concurrency/actor/.actor/actor.json b/test/e2e/request-queue-v2-zero-concurrency/actor/.actor/actor.json new file mode 100644 index 000000000000..959ec7eaa2b0 --- /dev/null +++ b/test/e2e/request-queue-v2-zero-concurrency/actor/.actor/actor.json @@ -0,0 +1,7 @@ +{ + "actorSpecification": 1, + "name": "test-request-queue-zero-concurrency", + "version": "0.0", + "buildTag": "latest", + "env": null +} diff --git a/test/e2e/request-queue-v2-zero-concurrency/actor/.gitignore b/test/e2e/request-queue-v2-zero-concurrency/actor/.gitignore new file mode 100644 index 000000000000..ced7cbfc582d --- /dev/null +++ b/test/e2e/request-queue-v2-zero-concurrency/actor/.gitignore @@ -0,0 +1,7 @@ +.idea +.DS_Store +node_modules +package-lock.json +apify_storage +crawlee_storage +storage diff --git a/test/e2e/request-queue-v2-zero-concurrency/actor/Dockerfile b/test/e2e/request-queue-v2-zero-concurrency/actor/Dockerfile new file mode 100644 index 000000000000..36afd80b9648 --- /dev/null +++ b/test/e2e/request-queue-v2-zero-concurrency/actor/Dockerfile @@ -0,0 +1,16 @@ +FROM apify/actor-node:20-beta + +COPY packages ./packages +COPY package*.json ./ + +RUN npm --quiet set progress=false \ + && npm install --only=prod --no-optional --no-audit \ + && npm update --no-audit \ + && echo "Installed NPM packages:" \ + && (npm list --only=prod --no-optional --all || true) \ + && echo "Node.js version:" \ + && node --version \ + && echo "NPM version:" \ + && npm --version + +COPY . ./ diff --git a/test/e2e/request-queue-v2-zero-concurrency/actor/main.js b/test/e2e/request-queue-v2-zero-concurrency/actor/main.js new file mode 100644 index 000000000000..2cb940fa8990 --- /dev/null +++ b/test/e2e/request-queue-v2-zero-concurrency/actor/main.js @@ -0,0 +1,33 @@ +import { CheerioCrawler, log, RequestQueue } from '@crawlee/cheerio'; +import { Actor } from 'apify'; + +log.setLevel(log.LEVELS.DEBUG); + +process.env.CRAWLEE_INTERNAL_TIMEOUT = '30000'; + +const mainOptions = { + exit: Actor.isAtHome(), + storage: + process.env.STORAGE_IMPLEMENTATION === 'LOCAL' + ? new (await import('@apify/storage-local')).ApifyStorageLocal() + : undefined, +}; + +// RequestQueue auto-reset when stuck with requests in progress +await Actor.main(async () => { + const requestQueue = await RequestQueue.open(); + await requestQueue.addRequest({ url: 'https://example.com/?q=1' }); + await requestQueue.addRequest({ url: 'https://example.com/?q=2' }); + const r3 = await requestQueue.addRequest({ url: 'https://example.com/?q=3' }); + // trigger 0 concurrency by marking one of the requests as already in progress + requestQueue.inProgress.add(r3.requestId); + + const crawler = new CheerioCrawler({ + requestQueue, + async requestHandler({ request }) { + log.info(request.id); + }, + }); + + await crawler.run(); +}, mainOptions); diff --git a/test/e2e/request-queue-v2-zero-concurrency/actor/package.json b/test/e2e/request-queue-v2-zero-concurrency/actor/package.json new file mode 100644 index 000000000000..1f24f5ba20d6 --- /dev/null +++ b/test/e2e/request-queue-v2-zero-concurrency/actor/package.json @@ -0,0 +1,28 @@ +{ + "name": "test-request-queue-zero-concurrency", + "version": "0.0.1", + "description": "Request Queue Test - Zero Concurrency", + "dependencies": { + "apify": "next", + "@apify/storage-local": "^2.1.3", + "@crawlee/basic": "file:./packages/basic-crawler", + "@crawlee/browser-pool": "file:./packages/browser-pool", + "@crawlee/http": "file:./packages/http-crawler", + "@crawlee/cheerio": "file:./packages/cheerio-crawler", + "@crawlee/core": "file:./packages/core", + "@crawlee/memory-storage": "file:./packages/memory-storage", + "@crawlee/types": "file:./packages/types", + "@crawlee/utils": "file:./packages/utils" + }, + "overrides": { + "apify": { + "@crawlee/core": "file:./packages/core", + "@crawlee/utils": "file:./packages/utils" + } + }, + "scripts": { + "start": "node main.js" + }, + "type": "module", + "license": "ISC" +} diff --git a/test/e2e/request-queue-v2-zero-concurrency/test.mjs b/test/e2e/request-queue-v2-zero-concurrency/test.mjs new file mode 100644 index 000000000000..42656d0ad0a0 --- /dev/null +++ b/test/e2e/request-queue-v2-zero-concurrency/test.mjs @@ -0,0 +1,12 @@ +import { initialize, getActorTestDir, runActor, expect } from '../tools.mjs'; + +const testActorDirname = getActorTestDir(import.meta.url); +await initialize(testActorDirname); + +const { stats } = await runActor(testActorDirname); + +await expect(stats.requestsFinished === 3, 'All requests finished'); +await expect( + stats.crawlerRuntimeMillis > 30 * 1e3 && stats.crawlerRuntimeMillis < 35 * 1e3, + 'RequestQueue triggers auto-reset after being stuck with requests in progress', +);