Skip to content

Commit

Permalink
refactor: make purgeDefaultStorages accept options with `onlyPurg…
Browse files Browse the repository at this point in the history
…eOnce` (#2044)

Adds `onlyPurgeOnce` option to `purgeDefaultStorages` so that storages
can be purged multiple times. For now, it preserves backwards-compatible
behaviour, and all callers in crawlee explicitly set it to `true`
  • Loading branch information
foxt451 authored Aug 24, 2023
1 parent d940a59 commit 8d6946c
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 7 deletions.
2 changes: 1 addition & 1 deletion packages/basic-crawler/src/internals/basic-crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,7 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext
this.stats.reset();
await this.stats.resetStore();

await purgeDefaultStorages();
await purgeDefaultStorages({ onlyPurgeOnce: true });

if (requests) {
await this.addRequests(requests, options);
Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/storages/dataset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ export class Dataset<Data extends Dictionary = Dictionary> {
options.config ??= Configuration.getGlobalConfig();
options.storageClient ??= options.config.getStorageClient();

await purgeDefaultStorages(options.config, options.storageClient);
await purgeDefaultStorages({ onlyPurgeOnce: true, client: options.storageClient, config: options.config });

const manager = StorageManager.getManager<Dataset<Data>>(this, options.config);

Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/storages/key_value_store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ export class KeyValueStore {
options.config ??= Configuration.getGlobalConfig();
options.storageClient ??= options.config.getStorageClient();

await purgeDefaultStorages(options.config, options.storageClient);
await purgeDefaultStorages({ onlyPurgeOnce: true, client: options.storageClient, config: options.config });

const manager = StorageManager.getManager(this, options.config);

Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/storages/request_list.ts
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ export class RequestList {
}

this.isLoading = true;
await purgeDefaultStorages();
await purgeDefaultStorages({ onlyPurgeOnce: true });

const [state, persistedRequests] = await this._loadStateAndPersistedRequests();

Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/storages/request_queue.ts
Original file line number Diff line number Diff line change
Expand Up @@ -954,7 +954,7 @@ export class RequestQueue {
options.config ??= Configuration.getGlobalConfig();
options.storageClient ??= options.config.getStorageClient();

await purgeDefaultStorages(options.config, options.storageClient);
await purgeDefaultStorages({ onlyPurgeOnce: true, client: options.storageClient, config: options.config });

const manager = StorageManager.getManager(this, options.config);
const queue = await manager.openStorage(queueIdOrName, options.storageClient);
Expand Down
39 changes: 37 additions & 2 deletions packages/core/src/storages/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ import type { Dictionary, StorageClient } from '@crawlee/types';
import { KeyValueStore } from './key_value_store';
import { Configuration } from '../configuration';

interface PurgeDefaultStorageOptions {
onlyPurgeOnce?: boolean;
config?: Configuration;
client?: StorageClient;
}

/**
* Cleans up the local storage folder (defaults to `./storage`) created when running code locally.
* Purging will remove all the files in all storages except for INPUT.json in the default KV store.
Expand All @@ -16,10 +22,39 @@ import { Configuration } from '../configuration';
* this method will make sure the storage is purged only once for a given execution context, so it is safe to call
* it multiple times.
*/
export async function purgeDefaultStorages(config = Configuration.getGlobalConfig(), client: StorageClient = config.getStorageClient()) {
export async function purgeDefaultStorages(config?: Configuration, client?: StorageClient): Promise<void>;
/**
* Cleans up the local storage folder (defaults to `./storage`) created when running code locally.
* Purging will remove all the files in all storages except for INPUT.json in the default KV store.
*
* Purging of storages is happening automatically when we run our crawler (or when we open some storage
* explicitly, e.g. via `RequestList.open()`). We can disable that via `purgeOnStart` {@apilink Configuration}
* option or by setting `CRAWLEE_PURGE_ON_START` environment variable to `0` or `false`.
*
* This is a shortcut for running (optional) `purge` method on the StorageClient interface, in other words
* it will call the `purge` method of the underlying storage implementation we are currently using. In addition,
* this method will make sure the storage is purged only once for a given execution context, so it is safe to call
* it multiple times, unless you set `onlyPurgeOnce` to `false` in the `options` object
*/
export async function purgeDefaultStorages(options?: PurgeDefaultStorageOptions): Promise<void>;
export async function purgeDefaultStorages(
configOrOptions?: Configuration | PurgeDefaultStorageOptions,
client?: StorageClient,
) {
const options: PurgeDefaultStorageOptions = configOrOptions instanceof Configuration ? {
client,
config: configOrOptions,
} : configOrOptions ?? {};
const {
config = Configuration.getGlobalConfig(),
onlyPurgeOnce = true,
} = options;
({ client = config.getStorageClient() } = options);

const casted = client as StorageClient & { __purged?: boolean };

if (config.get('purgeOnStart') && !casted.__purged) {
// if `onlyPurgeOnce` is true, will purge anytime this function is called, otherwise - only on start
if (!onlyPurgeOnce || (config.get('purgeOnStart') && !casted.__purged)) {
casted.__purged = true;
await casted.purge?.();
}
Expand Down

0 comments on commit 8d6946c

Please sign in to comment.