Skip to content

Commit

Permalink
Memory and redirect RFC updates to netscrape
Browse files Browse the repository at this point in the history
  • Loading branch information
russellsteadman authored Oct 12, 2023
1 parent 9fbd3e9 commit bf2cee7
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 20 deletions.
5 changes: 5 additions & 0 deletions packages/bot/src/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ enum BotErrorType {
Delay,
RobotsTxt,
Configuration,
MemorySafety,
}

export class BotError extends Error implements Error {
Expand All @@ -21,3 +22,7 @@ export class DelayError extends BotError implements BotError {
export class ConfigError extends BotError implements BotError {
type = BotErrorType.Configuration;
}

export class MemoryError extends BotError implements BotError {
type = BotErrorType.MemorySafety;
}
6 changes: 6 additions & 0 deletions packages/bot/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ class Bot {
shared: false,
immutableMinTimeToLive: 3600 * 1000,
},
maxRedirects: 5,
dnsCache: this.options.disableCaching ? false : this.dnsCachable,
...options?.overrides,
};
Expand Down Expand Up @@ -211,6 +212,11 @@ class Bot {
throw new Errors.RobotsRejection('Robots.txt server error');
}

if (Buffer.byteLength(robotsTxt.body) > 5e5) {
// RFC 9309 2.5: Can reject robots.txt files larger than 500KB
throw new Errors.MemoryError('Robots.txt too large');
}

// Parse the robots.txt
this.robotsTxt[origin] = new RobotsTxt(robotsTxt.body);

Expand Down
2 changes: 2 additions & 0 deletions packages/bot/src/test/_server.util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ export async function startServer(
});
});

server.setMaxListeners(0);

return { server, requests, port };
}

Expand Down
31 changes: 11 additions & 20 deletions packages/bot/src/test/bot.spec.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
import test from 'ava';
import Bot from '../index.js';
import * as Errors from '../errors.js';
import { type Server } from 'http';
import { startServer, stopServer } from './_server.util.js';

let server: Server | undefined;

test('Bot#constructor()', (it) => {
const bot = new Bot({ name: 'Test', version: '0.1' });
it.is(typeof bot.makeRequest, 'function');
});

test('Bot disallow none', async (it) => {
const start = await startServer('User-agent: *\nDisallow:');
server = start.server;
it.teardown(async () => await stopServer(start.server));

const bot = new Bot({ name: 'Test', version: '0.1' });

Expand Down Expand Up @@ -51,7 +48,7 @@ test('Bot disallow none', async (it) => {

test('Bot allow all', async (it) => {
const start = await startServer('User-agent: *\nAllow: /');
server = start.server;
it.teardown(async () => await stopServer(start.server));

const bot = new Bot({ name: 'Test', version: '0.1' });

Expand All @@ -70,7 +67,7 @@ test('Bot allow all', async (it) => {

test('Bot disallow all', async (it) => {
const start = await startServer('User-agent: *\nDisallow: /');
server = start.server;
it.teardown(async () => await stopServer(start.server));

const bot = new Bot({ name: 'Test', version: '0.1' });

Expand Down Expand Up @@ -107,11 +104,11 @@ test('Bot disallow all', async (it) => {
);
});

test('Bot responds to 400 errors', async (it) => {
test('RFC 9309 2.3.1.3: Responds to 400 errors', async (it) => {
const start = await startServer('User-agent: *\nAllow: /', {
robots400: true,
});
server = start.server;
it.teardown(async () => await stopServer(start.server));

const bot = new Bot({ name: 'Test', version: '0.1' });

Expand All @@ -128,49 +125,43 @@ test('Bot responds to 400 errors', async (it) => {
it.is(req.body, 'C');
});

test('Bot responds to 500 errors', async (it) => {
test('RFC 9309 2.3.1.4: Responds to 500 errors', async (it) => {
const start = await startServer('User-agent: *\nAllow: /', {
robots500: true,
});
server = start.server;
it.teardown(async () => await stopServer(start.server));

const bot = new Bot({ name: 'Test', version: '0.1' });

await it.throwsAsync(
() => bot.makeRequest(`http://127.0.0.1:${start.port}/`),
{
instanceOf: Errors.RobotsRejection,
message: 'Request blocked by robots.txt',
message: 'Robots.txt server error',
},
);

await it.throwsAsync(
() => bot.makeRequest(`http://127.0.0.1:${start.port}/a`),
{
instanceOf: Errors.RobotsRejection,
message: 'Request blocked by robots.txt',
message: 'Robots.txt server error',
},
);

await it.throwsAsync(
() => bot.makeRequest(`http://127.0.0.1:${start.port}/a/b`),
{
instanceOf: Errors.RobotsRejection,
message: 'Request blocked by robots.txt',
message: 'Robots.txt server error',
},
);

await it.throwsAsync(
() => bot.makeRequest(`http://127.0.0.1:${start.port}/a/b/c`),
{
instanceOf: Errors.RobotsRejection,
message: 'Request blocked by robots.txt',
message: 'Robots.txt server error',
},
);
});

test.afterEach(async () => {
if (server) {
await stopServer(server);
}
});

0 comments on commit bf2cee7

Please sign in to comment.