Skip to content

Commit

Permalink
Merge pull request #8999 from weseek/feat/147526-151412-reuse-upload-…
Browse files Browse the repository at this point in the history
…on-duplicate-request

reuse existing upload on duplicate request
  • Loading branch information
arafubeatbox committed Aug 26, 2024
2 parents 1c39b00 + 90d15fe commit 1e65f0d
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ export interface IPageBulkExportJob {
completedAt?: Date, // the date at which job was completed
attachment?: Ref<IAttachment>,
status: PageBulkExportJobStatus,
revisionListHash?: string, // Hash created from the list of revision IDs. Used to detect existing duplicate uploads.
}

export interface IPageBulkExportJobHasId extends IPageBulkExportJob, HasObjectId {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ const pageBulkExportJobSchema = new Schema<PageBulkExportJobDocument>({
status: {
type: String, enum: Object.values(PageBulkExportJobStatus), required: true, default: PageBulkExportJobStatus.initializing,
},
revisionListHash: { type: String },
}, { timestamps: true });

export default getOrCreateModel<PageBulkExportJobDocument, PageBulkExportJobModel>('PageBulkExportJob', pageBulkExportJobSchema);
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,24 @@ class PageBulkExportJobCronService extends CronService {
*/
async deleteDownloadExpiredExportJobs() {
const downloadExpirationSeconds = configManager.getConfig('crowi', 'app:bulkExportDownloadExpirationSeconds');
const thresholdDate = new Date(Date.now() - downloadExpirationSeconds * 1000);
const downloadExpiredExportJobs = await PageBulkExportJob.find({
status: PageBulkExportJobStatus.completed,
completedAt: { $lt: new Date(Date.now() - downloadExpirationSeconds * 1000) },
completedAt: { $lt: thresholdDate },
});

const cleanup = async(job: PageBulkExportJobDocument) => {
await pageBulkExportService?.cleanUpExportJobResources(job);
await this.crowi.attachmentService?.removeAttachment(job.attachment);

const hasSameAttachmentAndDownloadNotExpired = await PageBulkExportJob.findOne({
attachment: job.attachment,
_id: { $ne: job._id },
completedAt: { $gte: thresholdDate },
});
if (hasSameAttachmentAndDownloadNotExpired == null) {
// delete attachment if no other export job (which download has not expired) has re-used it
await this.crowi.attachmentService?.removeAttachment(job.attachment);
}
};

await this.cleanUpAndDeleteBulkExportJobs(downloadExpiredExportJobs, cleanup);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import { createHash } from 'crypto';
import fs from 'fs';
import path from 'path';
import { Writable } from 'stream';
import { pipeline as pipelinePromise } from 'stream/promises';

import {
getIdForRef, type IPage, isPopulated, SubscriptionStatusType,
getIdForRef, getIdStringForRef, type IPage, isPopulated, SubscriptionStatusType,
} from '@growi/core';
import { getParentPath, normalizePath } from '@growi/core/dist/utils/path-utils';
import type { Archiver } from 'archiver';
Expand Down Expand Up @@ -110,7 +111,22 @@ class PageBulkExportService {

if (pageBulkExportJob.status === PageBulkExportJobStatus.initializing) {
await this.createPageSnapshots(user, pageBulkExportJob);
pageBulkExportJob.status = PageBulkExportJobStatus.exporting;

const duplicateExportJob = await PageBulkExportJob.findOne({
user: pageBulkExportJob.user,
page: pageBulkExportJob.page,
format: pageBulkExportJob.format,
status: PageBulkExportJobStatus.completed,
revisionListHash: pageBulkExportJob.revisionListHash,
});
if (duplicateExportJob != null) {
// if an upload with the exact same contents exists, re-use the same attachment of that upload
pageBulkExportJob.attachment = duplicateExportJob.attachment;
pageBulkExportJob.status = PageBulkExportJobStatus.completed;
}
else {
pageBulkExportJob.status = PageBulkExportJobStatus.exporting;
}
await pageBulkExportJob.save();
}
if (pageBulkExportJob.status === PageBulkExportJobStatus.exporting) {
Expand Down Expand Up @@ -162,7 +178,8 @@ class PageBulkExportService {
}

/**
* Create a snapshot for each page that is to be exported in the pageBulkExportJob
* Create a snapshot for each page that is to be exported in the pageBulkExportJob.
* Also calulate revisionListHash and save it to the pageBulkExportJob.
*/
private async createPageSnapshots(user, pageBulkExportJob: PageBulkExportJobDocument): Promise<void> {
// if the process of creating snapshots was interrupted, delete the snapshots and create from the start
Expand All @@ -173,6 +190,8 @@ class PageBulkExportService {
throw new Error('Base page not found');
}

const revisionListHash = createHash('sha256');

// create a Readable for pages to be exported
const { PageQueryBuilder } = this.pageModel;
const builder = await new PageQueryBuilder(this.pageModel.find())
Expand All @@ -188,6 +207,9 @@ class PageBulkExportService {
objectMode: true,
write: async(page: PageDocument, encoding, callback) => {
try {
if (page.revision != null) {
revisionListHash.update(getIdStringForRef(page.revision));
}
await PageBulkExportPageSnapshot.create({
pageBulkExportJob,
path: page.path,
Expand All @@ -205,6 +227,9 @@ class PageBulkExportService {
this.pageBulkExportJobStreamManager.addJobStream(pageBulkExportJob._id, pagesReadable);

await pipelinePromise(pagesReadable, pageSnapshotsWritable);

pageBulkExportJob.revisionListHash = revisionListHash.digest('hex');
await pageBulkExportJob.save();
}

/**
Expand Down Expand Up @@ -267,7 +292,8 @@ class PageBulkExportService {
const pageArchiver = this.setUpPageArchiver();
const bufferToPartSizeTransform = getBufferToFixedSizeTransform(this.maxPartSize);

const originalName = `${pageBulkExportJob._id}.${this.compressExtension}`;
if (pageBulkExportJob.revisionListHash == null) throw new Error('revisionListHash is not set');
const originalName = `${pageBulkExportJob.revisionListHash}.${this.compressExtension}`;
const attachment = Attachment.createWithoutSave(null, user, originalName, this.compressExtension, 0, AttachmentType.PAGE_BULK_EXPORT);
const uploadKey = `${FilePathOnStoragePrefix.pageBulkExport}/${attachment.fileName}`;

Expand Down

0 comments on commit 1e65f0d

Please sign in to comment.