From a55780761d2bf464c4ed495c5cdd39f3a7f7d905 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Thu, 9 Jun 2022 08:52:57 -0600 Subject: [PATCH] fixed duplicate nodes --- src/data_processing/process.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/data_processing/process.ts b/src/data_processing/process.ts index 452e12370..dcb3aeffb 100644 --- a/src/data_processing/process.ts +++ b/src/data_processing/process.ts @@ -87,7 +87,7 @@ export async function ProcessData(staging: DataStaging): Promise Logger.error(`unable to fetch files for data staging records ${stagingFiles.error?.error}`); } - const nodesToInsert: Node[] = []; + let nodesToInsert: Node[] = []; const edgesToInsert: Edge[] = []; const timeseriesToInsert: TimeseriesEntry[] = []; @@ -115,6 +115,11 @@ export async function ProcessData(staging: DataStaging): Promise if (IsTimeseries(results.value)) timeseriesToInsert.push(...results.value); } + // we must deduplicate nodes based on original ID in order to avoid a database transaction error. We toss out the + // duplicates because even if we inserted them they'd be overwritten, or overwrite, the original. Users should be made + // aware that if their import is generating records with the same original ID only one instance is going to be inserted + nodesToInsert = nodesToInsert.filter((value, index, self) => index === self.findIndex((t) => t.original_data_id === value.original_data_id)); + // insert all nodes and files if (nodesToInsert.length > 0) { const inserted = await nodeRepository.bulkSave(staging.data_source_id!, nodesToInsert, transaction.value);