Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reuse existing upload on duplicate request #8999

Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ export interface IPageBulkExportJob {
completedAt?: Date, // the date at which job was completed
attachment?: Ref<IAttachment>,
status: PageBulkExportJobStatus,
revisionListHash?: string, // Hash created from the list of revision IDs. Used to detect existing duplicate uploads.
}

export interface IPageBulkExportJobHasId extends IPageBulkExportJob, HasObjectId {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ const pageBulkExportJobSchema = new Schema<PageBulkExportJobDocument>({
status: {
type: String, enum: Object.values(PageBulkExportJobStatus), required: true, default: PageBulkExportJobStatus.initializing,
},
revisionListHash: { type: String },
}, { timestamps: true });

export default getOrCreateModel<PageBulkExportJobDocument, PageBulkExportJobModel>('PageBulkExportJob', pageBulkExportJobSchema);
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,24 @@ class PageBulkExportJobCronService extends CronService {
*/
async deleteDownloadExpiredExportJobs() {
const downloadExpirationSeconds = configManager.getConfig('crowi', 'app:bulkExportDownloadExpirationSeconds');
const thresholdDate = new Date(Date.now() - downloadExpirationSeconds * 1000);
const downloadExpiredExportJobs = await PageBulkExportJob.find({
status: PageBulkExportJobStatus.completed,
completedAt: { $lt: new Date(Date.now() - downloadExpirationSeconds * 1000) },
completedAt: { $lt: thresholdDate },
});

const cleanup = async(job: PageBulkExportJobDocument) => {
await pageBulkExportService?.cleanUpExportJobResources(job);
await this.crowi.attachmentService?.removeAttachment(job.attachment);

const hasSameAttachmentAndDownloadNotExpired = await PageBulkExportJob.findOne({
attachment: job.attachment,
_id: { $ne: job._id },
completedAt: { $gte: thresholdDate },
});
if (hasSameAttachmentAndDownloadNotExpired == null) {
// delete attachment if no other export job (which download has not expired) has re-used it
await this.crowi.attachmentService?.removeAttachment(job.attachment);
}
};

await this.cleanUpAndDeleteBulkExportJobs(downloadExpiredExportJobs, cleanup);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import { createHash } from 'crypto';
import fs from 'fs';
import path from 'path';
import { Writable } from 'stream';
import { pipeline as pipelinePromise } from 'stream/promises';

import {
getIdForRef, type IPage, isPopulated, SubscriptionStatusType,
getIdForRef, getIdStringForRef, type IPage, isPopulated, SubscriptionStatusType,
} from '@growi/core';
import { getParentPath, normalizePath } from '@growi/core/dist/utils/path-utils';
import type { Archiver } from 'archiver';
Expand Down Expand Up @@ -110,7 +111,22 @@ class PageBulkExportService {

if (pageBulkExportJob.status === PageBulkExportJobStatus.initializing) {
await this.createPageSnapshots(user, pageBulkExportJob);
pageBulkExportJob.status = PageBulkExportJobStatus.exporting;

const duplicateExportJob = await PageBulkExportJob.findOne({
user: pageBulkExportJob.user,
page: pageBulkExportJob.page,
format: pageBulkExportJob.format,
status: PageBulkExportJobStatus.completed,
revisionListHash: pageBulkExportJob.revisionListHash,
});
if (duplicateExportJob != null) {
// if an upload with the exact same contents exists, re-use the same attachment of that upload
pageBulkExportJob.attachment = duplicateExportJob.attachment;
pageBulkExportJob.status = PageBulkExportJobStatus.completed;
}
else {
pageBulkExportJob.status = PageBulkExportJobStatus.exporting;
}
await pageBulkExportJob.save();
}
if (pageBulkExportJob.status === PageBulkExportJobStatus.exporting) {
Expand Down Expand Up @@ -162,7 +178,8 @@ class PageBulkExportService {
}

/**
* Create a snapshot for each page that is to be exported in the pageBulkExportJob
* Create a snapshot for each page that is to be exported in the pageBulkExportJob.
* Also calulate revisionListHash and save it to the pageBulkExportJob.
*/
private async createPageSnapshots(user, pageBulkExportJob: PageBulkExportJobDocument): Promise<void> {
// if the process of creating snapshots was interrupted, delete the snapshots and create from the start
Expand All @@ -173,6 +190,8 @@ class PageBulkExportService {
throw new Error('Base page not found');
}

const revisionListHash = createHash('sha256');

// create a Readable for pages to be exported
const { PageQueryBuilder } = this.pageModel;
const builder = await new PageQueryBuilder(this.pageModel.find())
Expand All @@ -188,6 +207,9 @@ class PageBulkExportService {
objectMode: true,
write: async(page: PageDocument, encoding, callback) => {
try {
if (page.revision != null) {
revisionListHash.update(getIdStringForRef(page.revision));
}
await PageBulkExportPageSnapshot.create({
pageBulkExportJob,
path: page.path,
Expand All @@ -205,6 +227,9 @@ class PageBulkExportService {
this.pageBulkExportJobStreamManager.addJobStream(pageBulkExportJob._id, pagesReadable);

await pipelinePromise(pagesReadable, pageSnapshotsWritable);

pageBulkExportJob.revisionListHash = revisionListHash.digest('hex');
await pageBulkExportJob.save();
}

/**
Expand Down Expand Up @@ -267,7 +292,8 @@ class PageBulkExportService {
const pageArchiver = this.setUpPageArchiver();
const bufferToPartSizeTransform = getBufferToFixedSizeTransform(this.maxPartSize);

const originalName = `${pageBulkExportJob._id}.${this.compressExtension}`;
if (pageBulkExportJob.revisionListHash == null) throw new Error('revisionListHash is not set');
const originalName = `${pageBulkExportJob.revisionListHash}.${this.compressExtension}`;
const attachment = Attachment.createWithoutSave(null, user, originalName, this.compressExtension, 0, AttachmentType.PAGE_BULK_EXPORT);
const uploadKey = `${FilePathOnStoragePrefix.pageBulkExport}/${attachment.fileName}`;

Expand Down
Loading