From 976bac948955687848d2d90734e650aa89f25dd9 Mon Sep 17 00:00:00 2001 From: EllAchE <26192612+EllAchE@users.noreply.github.com> Date: Wed, 20 Mar 2024 00:56:38 -0700 Subject: [PATCH] fix decompression script etc --- package-lock.json | 146 ++++++++++++++++++++++++++++++++++--- package.json | 3 +- src/queue.ts | 29 ++++---- src/run_metrics_on_file.ts | 65 +++++++++-------- src/zst_decompressor.ts | 35 ++++----- 5 files changed, 208 insertions(+), 70 deletions(-) diff --git a/package-lock.json b/package-lock.json index 3c5ef78..bdb7eed 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,7 +13,8 @@ "chess.js": "^1.0.0-beta.6", "d3": "^7.8.5", "node-zstandard": "^1.2.4", - "proper-lockfile": "^4.1.2" + "proper-lockfile": "^4.1.2", + "ts-node": "10.9.2" }, "devDependencies": { "@babel/preset-typescript": "^7.23.0", @@ -810,6 +811,26 @@ "integrity": "sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==", "dev": true }, + "node_modules/@cspotcode/source-map-support": { + "version": "0.8.1", + "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", + "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==", + "dependencies": { + "@jridgewell/trace-mapping": "0.3.9" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@cspotcode/source-map-support/node_modules/@jridgewell/trace-mapping": { + "version": "0.3.9", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz", + "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==", + "dependencies": { + "@jridgewell/resolve-uri": "^3.0.3", + "@jridgewell/sourcemap-codec": "^1.4.10" + } + }, "node_modules/@istanbuljs/load-nyc-config": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz", @@ -1131,7 +1152,6 @@ "version": "3.1.1", "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.1.tgz", "integrity": "sha512-dSYZh7HhCDtCKm4QakX0xFpsRDqjjtZf/kjI/v3T3Nwt5r8/qz/M19F9ySyOqU94SXBmeG9ttTul+YnR4LOxFA==", - "dev": true, "engines": { "node": ">=6.0.0" } @@ -1148,8 +1168,7 @@ "node_modules/@jridgewell/sourcemap-codec": { "version": "1.4.15", "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz", - "integrity": "sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==", - "dev": true + "integrity": "sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==" }, "node_modules/@jridgewell/trace-mapping": { "version": "0.3.19", @@ -1185,6 +1204,26 @@ "@sinonjs/commons": "^3.0.0" } }, + "node_modules/@tsconfig/node10": { + "version": "1.0.9", + "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.9.tgz", + "integrity": "sha512-jNsYVVxU8v5g43Erja32laIDHXeoNvFEpX33OK4d6hljo3jDhCBDhx5dhCCTMWUojscpAagGiRkBKxpdl9fxqA==" + }, + "node_modules/@tsconfig/node12": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/@tsconfig/node12/-/node12-1.0.11.tgz", + "integrity": "sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==" + }, + "node_modules/@tsconfig/node14": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@tsconfig/node14/-/node14-1.0.3.tgz", + "integrity": "sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==" + }, + "node_modules/@tsconfig/node16": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.4.tgz", + "integrity": "sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==" + }, "node_modules/@types/async": { "version": "3.2.24", "resolved": "https://registry.npmjs.org/@types/async/-/async-3.2.24.tgz", @@ -1543,8 +1582,7 @@ "node_modules/@types/node": { "version": "18.11.18", "resolved": "https://registry.npmjs.org/@types/node/-/node-18.11.18.tgz", - "integrity": "sha512-DHQpWGjyQKSHj3ebjFI/wRKcqQcdR+MoFBygntYOZytCqNfkd2ZC4ARDJ2DQqhjH5p85Nnd3jhUJIXrszFX/JA==", - "dev": true + "integrity": "sha512-DHQpWGjyQKSHj3ebjFI/wRKcqQcdR+MoFBygntYOZytCqNfkd2ZC4ARDJ2DQqhjH5p85Nnd3jhUJIXrszFX/JA==" }, "node_modules/@types/proper-lockfile": { "version": "4.1.4", @@ -1582,6 +1620,25 @@ "integrity": "sha512-iO9ZQHkZxHn4mSakYV0vFHAVDyEOIJQrV2uZ06HxEPcx+mt8swXoZHIbaaJ2crJYFfErySgktuTZ3BeLz+XmFA==", "dev": true }, + "node_modules/acorn": { + "version": "8.11.3", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.11.3.tgz", + "integrity": "sha512-Y9rRfJG5jcKOE0CLisYbojUjIrIEE7AGMzA/Sm4BslANhbS+cDMpgBdcPT91oJ7OuJ9hYJBx59RjbhxVnrF8Xg==", + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/acorn-walk": { + "version": "8.3.2", + "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.2.tgz", + "integrity": "sha512-cjkyv4OtNCIeqhHrfS81QWXoCBPExR/J62oyEqepVw8WaQeSqpW2uhuLPh1m9eWhDuOo/jUXVTlifvesOWp/4A==", + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/ansi-escapes": { "version": "4.3.2", "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-4.3.2.tgz", @@ -1634,6 +1691,11 @@ "node": ">= 8" } }, + "node_modules/arg": { + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz", + "integrity": "sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==" + }, "node_modules/argparse": { "version": "1.0.10", "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", @@ -2028,6 +2090,11 @@ "node": "^14.15.0 || ^16.10.0 || >=18.0.0" } }, + "node_modules/create-require": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz", + "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==" + }, "node_modules/cross-spawn": { "version": "7.0.3", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", @@ -2469,6 +2536,14 @@ "node": ">=8" } }, + "node_modules/diff": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz", + "integrity": "sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==", + "engines": { + "node": ">=0.3.1" + } + }, "node_modules/diff-sequences": { "version": "29.6.3", "resolved": "https://registry.npmjs.org/diff-sequences/-/diff-sequences-29.6.3.tgz", @@ -3729,8 +3804,7 @@ "node_modules/make-error": { "version": "1.3.6", "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz", - "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==", - "dev": true + "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==" }, "node_modules/makeerror": { "version": "1.0.12", @@ -4443,6 +4517,48 @@ "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", "dev": true }, + "node_modules/ts-node": { + "version": "10.9.2", + "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz", + "integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==", + "dependencies": { + "@cspotcode/source-map-support": "^0.8.0", + "@tsconfig/node10": "^1.0.7", + "@tsconfig/node12": "^1.0.7", + "@tsconfig/node14": "^1.0.0", + "@tsconfig/node16": "^1.0.2", + "acorn": "^8.4.1", + "acorn-walk": "^8.1.1", + "arg": "^4.1.0", + "create-require": "^1.1.0", + "diff": "^4.0.1", + "make-error": "^1.1.1", + "v8-compile-cache-lib": "^3.0.1", + "yn": "3.1.1" + }, + "bin": { + "ts-node": "dist/bin.js", + "ts-node-cwd": "dist/bin-cwd.js", + "ts-node-esm": "dist/bin-esm.js", + "ts-node-script": "dist/bin-script.js", + "ts-node-transpile-only": "dist/bin-transpile.js", + "ts-script": "dist/bin-script-deprecated.js" + }, + "peerDependencies": { + "@swc/core": ">=1.2.50", + "@swc/wasm": ">=1.2.50", + "@types/node": "*", + "typescript": ">=2.7" + }, + "peerDependenciesMeta": { + "@swc/core": { + "optional": true + }, + "@swc/wasm": { + "optional": true + } + } + }, "node_modules/tslib": { "version": "2.6.2", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz", @@ -4473,7 +4589,6 @@ "version": "5.2.2", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.2.2.tgz", "integrity": "sha512-mI4WrpHsbCIcwT9cF4FZvr80QUeKvsUsUvKDoR+X/7XHQH98xYD8YHZg7ANtz2GtZt/CBq2QJ0thkGJMHfqc1w==", - "dev": true, "peer": true, "bin": { "tsc": "bin/tsc", @@ -4513,6 +4628,11 @@ "browserslist": ">= 4.21.0" } }, + "node_modules/v8-compile-cache-lib": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", + "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==" + }, "node_modules/v8-to-istanbul": { "version": "9.1.0", "resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.1.0.tgz", @@ -4635,6 +4755,14 @@ "node": ">=12" } }, + "node_modules/yn": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz", + "integrity": "sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==", + "engines": { + "node": ">=6" + } + }, "node_modules/yocto-queue": { "version": "0.1.0", "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", diff --git a/package.json b/package.json index 7a76d85..ecfe5b5 100644 --- a/package.json +++ b/package.json @@ -8,7 +8,8 @@ "chess.js": "^1.0.0-beta.6", "d3": "^7.8.5", "node-zstandard": "^1.2.4", - "proper-lockfile": "^4.1.2" + "proper-lockfile": "^4.1.2", + "ts-node": "10.9.2" }, "devDependencies": { "@babel/preset-typescript": "^7.23.0", diff --git a/src/queue.ts b/src/queue.ts index 32a6b0a..22dbbf7 100644 --- a/src/queue.ts +++ b/src/queue.ts @@ -7,22 +7,25 @@ export const RESULTS_PATH = `${__dirname}/results.json`; function launchQueueServer() { // Create a write to result.json queue with a concurrency of 1 // Possibly the simplest fix would be to run this as a separate process, then we can enforce messages sent to this queue are processed in order - const queue = asyncLib.queue((task) => { + const queue = asyncLib.queue((task, callback) => { console.log('received task', task.analysisKey); - return new Promise((resolve, reject) => { - const { results, analysisKey } = task; - try { - fs.writeFileSync(RESULTS_PATH, JSON.stringify(results, null, 2)); - console.log( - `Analysis "${analysisKey}" has been written to ${RESULTS_PATH}` - ); - resolve(); - } catch (err) { - reject(err); - } - }); + // return new Promise((resolve, reject) => { + const { results, analysisKey } = task; + try { + fs.writeFileSync(RESULTS_PATH, JSON.stringify(results, null, 2)); + console.log( + `Analysis "${analysisKey}" has been written to ${RESULTS_PATH}` + ); + } catch (err) { + console.error('Error writing to results.json', err); + } + // }); }, 1); + queue.drain(function () { + console.log('all items have been processed'); + }); + // this event listener receives tasks from the parallel processes const server = net.createServer((socket) => { socket.on('data', (data) => { diff --git a/src/run_metrics_on_file.ts b/src/run_metrics_on_file.ts index aeba37e..c2f96bf 100644 --- a/src/run_metrics_on_file.ts +++ b/src/run_metrics_on_file.ts @@ -24,20 +24,47 @@ import { RESULTS_PATH } from './queue'; */ export async function main(path: string) { console.time('Total Execution Time'); - await gameIterator(path); + await gameIterator(path, { 'Number of games analyzed': 0 }); console.timeEnd('Total Execution Time'); - return results; -} -let results = { - 'Number of games analyzed': 0, -}; + const now = new Date(); + const milliseconds = now.getMilliseconds(); + + const analysisKey = `analysis_${now + .toLocaleString() + .replace(/\/|,|:|\s/g, '_')}_${milliseconds}`; + + let existingResults = {}; + if (fs.existsSync(RESULTS_PATH)) { + const fileContent = fs.readFileSync(RESULTS_PATH, 'utf8'); + if (fileContent !== '') { + existingResults = JSON.parse(fileContent); + } + } + + console.log('sending results'); + + // TODO: Probably we need to read in the existing results in the queue server and merge them, as when there are multiple items in the queue + // this is going to be out of date + existingResults[analysisKey] = { + 'Number of games analyzed': 0, + }; + + const client = net.createConnection({ port: 8000 }); + + console.log('connected to queue server'); + + // Send the task to the queue server + client.write(JSON.stringify({ results: existingResults, analysisKey })); + + console.log('results sent'); +} /** * Metric functions will ingest a single game at a time * @param metricFunctions */ -async function gameIterator(path) { +async function gameIterator(path, results) { const cjsmin = new Chess(); const gamesGenerator = gameChunks(path); @@ -74,27 +101,5 @@ async function gameIterator(path) { // for use with zst_decompresser.js if (require.main === module) { - main(process.argv[2]).then((results) => { - const now = new Date(); - const milliseconds = now.getMilliseconds(); - - const analysisKey = `analysis_${now - .toLocaleString() - .replace(/\/|,|:|\s/g, '_')}_${milliseconds}`; - - let existingResults = {}; - if (fs.existsSync(RESULTS_PATH)) { - const fileContent = fs.readFileSync(RESULTS_PATH, 'utf8'); - if (fileContent !== '') { - existingResults = JSON.parse(fileContent); - } - } - - existingResults[analysisKey] = results; - - const client = net.createConnection({ port: 8000 }); - - // Send the task to the queue server - client.write(JSON.stringify({ results: existingResults, analysisKey })); - }); + main(process.argv[2]).then((results) => {}); } diff --git a/src/zst_decompressor.ts b/src/zst_decompressor.ts index a267602..f8796b9 100644 --- a/src/zst_decompressor.ts +++ b/src/zst_decompressor.ts @@ -9,7 +9,7 @@ const { spawn } = require('child_process'); // 30 games = 10*1024 bytes, 1 game = 350 bytes, 1000 games = 330KB, 100K games = 33MB // 10MB yields around 30k games, 5GB = around 15 million games // const SIZE_LIMIT = 30 * 1024 * 1024; // 30MB -const SIZE_LIMIT = 30 * 1024 * 1024; // 0.5MB, for testing +const SIZE_LIMIT = 0.1 * 1024 * 1024; // 0.5MB, for testing // set the total size limit of the combined decompressed files (this is how much space you need to have available on your PC prior to running node src/streaming_partial_decompresser.js) const decompressedSizeLimit = 500 * 1024 * 1024 * 1024; // 500 GB represented in bytes @@ -32,11 +32,10 @@ async function runAnalysis(filePath: string) { // Run the analysis script console.log(`Running analysis script on ${filePath}...`); - const analysisFileBasePath = path.resolve(__dirname, '..', 'src'); + const analysisFileBasePath = path.resolve(__dirname, '..', '..', 'src'); const child = spawn('ts-node', [ - // '/Users/bennyrubanov/Coding_Projects/chessanalysis/src/index_with_decompressor.ts', - `${analysisFileBasePath}/run_metrics_on_input.ts`, + `${analysisFileBasePath}/run_metrics_on_file.ts`, filePath, ]); @@ -82,7 +81,7 @@ const decompressAndAnalyze = async (file, start = 0) => { let these_chunks_counter = 0; // Initialize the chunk counter let file_counter = 1; // Initialize the file counter let total_chunk_counter = 0; - const filesProduced = new Set(); + const filesProduced = new Set(); // const base_path = `/Users/bennyrubanov/Coding_Projects/chessanalysis/data/${file.replace( // base_path used to enumerate where new files should go @@ -193,23 +192,25 @@ const decompressAndAnalyze = async (file, start = 0) => { }); result.on('end', async () => { - // When all data is decompressed, run the analysis on the last file - let lastAnalysisPromise = runAnalysis(newFilePath) - .then(() => { - if (fs.existsSync(newFilePath)) { - fs.unlinkSync(newFilePath); - console.log(`File ${newFilePath} has been deleted.`); - } - }) - .catch(console.error); + // When all data is decompressed, run the analysis on the produced files concurrently + for (const file of Array.from(filesProduced).slice(0, 5)) { + // TODO: this won't work out of the box for a large number of files as there is no max concurrency. But the sample only produces 4 decompressed files + // I'm slicing to test this with a smaller number of files - analysisPromises.push(lastAnalysisPromise); - filesBeingAnalyzed.add(newFilePath); + analysisPromises.push(runAnalysis(file)); + filesBeingAnalyzed.add(newFilePath); + } - // When all analyses are done, delete the files + // When all analyses are done, delete the files from the set Promise.allSettled(analysisPromises) .then(() => { console.log('All analyses completed'); + for (const file of filesBeingAnalyzed) { + if (fs.existsSync(file)) { + fs.unlinkSync(file); + console.log(`File ${file} has been deleted.`); + } + } filesBeingAnalyzed.clear(); }) .catch(console.error);