From 049f0519e3be2e7715fd809596c48c340b6b26db Mon Sep 17 00:00:00 2001 From: efiege <105237007+efiege@users.noreply.github.com> Date: Mon, 24 Jul 2023 08:44:51 +0200 Subject: [PATCH] fix: auto delete old transfer-processes if their count exceeds a certain limit (#449) Co-authored-by: Simon Engmann Co-authored-by: Tim Berthold <75306992+tmberthold@users.noreply.github.com> --- .env | 2 +- .../migration/DatabaseMigrationManager.java | 10 ++++- .../V2__Delete-Transfer-Processes-Trigger.sql | 41 +++++++++++++++++++ 3 files changed, 50 insertions(+), 3 deletions(-) create mode 100644 extensions/postgres-flyway/src/main/resources/migration/default/V2__Delete-Transfer-Processes-Trigger.sql diff --git a/.env b/.env index 0ec60c9f1..5f3a8c755 100644 --- a/.env +++ b/.env @@ -5,7 +5,7 @@ EDC_OAUTH_CLIENT_ID=override this via ENV vars # For docker-compose.yaml only: # Override images via ENV vars RELEASE_EDC_IMAGE=ghcr.io/sovity/edc-ce-mds:4.0.1 -RELEASE_EDC_UI_IMAGE=ghcr.io/sovity/edc-ui:0.0.1-milestone-8-sovity11 +RELEASE_EDC_UI_IMAGE=ghcr.io/sovity/edc-ui:0.0.1-milestone-8-sovity12 # For docker-compose-dev.yaml only: # Override images via ENV vars diff --git a/extensions/postgres-flyway/src/main/java/de/sovity/edc/extension/postgresql/migration/DatabaseMigrationManager.java b/extensions/postgres-flyway/src/main/java/de/sovity/edc/extension/postgresql/migration/DatabaseMigrationManager.java index 3df089ebc..a4f81589c 100644 --- a/extensions/postgres-flyway/src/main/java/de/sovity/edc/extension/postgresql/migration/DatabaseMigrationManager.java +++ b/extensions/postgres-flyway/src/main/java/de/sovity/edc/extension/postgresql/migration/DatabaseMigrationManager.java @@ -20,6 +20,7 @@ import java.util.Arrays; import java.util.List; +import java.util.stream.Collectors; public class DatabaseMigrationManager { @Setting @@ -45,9 +46,14 @@ public void migrateAllDataSources() { private List getDataSourceNames(Config config) { var edcDatasourceConfig = config.getConfig(EDC_DATASOURCE_PREFIX); - return edcDatasourceConfig.partition().toList().stream() + var dataSourceNames = edcDatasourceConfig.partition().toList().stream() .map(Config::currentNode) - .toList(); + .collect(Collectors.toList()); + // The default data source is always migrated last + if (dataSourceNames.remove(DEFAULT_DATASOURCE)) { + dataSourceNames.add(DEFAULT_DATASOURCE); + } + return dataSourceNames; } public List getAdditionalFlywayMigrationLocations(String datasourceName) { diff --git a/extensions/postgres-flyway/src/main/resources/migration/default/V2__Delete-Transfer-Processes-Trigger.sql b/extensions/postgres-flyway/src/main/resources/migration/default/V2__Delete-Transfer-Processes-Trigger.sql new file mode 100644 index 000000000..5ca84691b --- /dev/null +++ b/extensions/postgres-flyway/src/main/resources/migration/default/V2__Delete-Transfer-Processes-Trigger.sql @@ -0,0 +1,41 @@ +-- Required for reasonably fast ON DELETE CASCADE from edc_transfer_process +create index data_request_transfer_process_id_idx + on edc_data_request (transfer_process_id); +-- Speed up sort + limit query +-- Include transferprocess_id to enable index-only scan +create index transfer_process_created_at_idx + on edc_transfer_process (created_at) include (transferprocess_id); + +-- Delete oldest row when table size exceeds 3000 rows +-- The row count should mostly stabilize slightly above 3000, as the reltuples data in pg_class is only updated by VACUUM +-- Unfortunately, I was not able to get conclusive results on the behavior under concurrent inserts +-- One problem is that the table might still grow over time, if concurrent inserts can delete the same row +-- To avoid this, we could delete two rows instead of just one +-- Then the table would shrink until the next auto-vacuum detects that it is below 3000 rows again +create function transfer_process_delete_old_rows() returns trigger as $$ +begin + delete from edc_transfer_process o + using ( + select i2.transferprocess_id + from edc_transfer_process i2 + order by i2.created_at + limit 2 + ) i, + ( + -- Hack to avoid count(*), which takes several hundred milliseconds + -- Not perfectly accurate, but close enough + -- Idea taken from: https://www.cybertec-postgresql.com/en/postgresql-count-made-fast/ + select pgc.reltuples::bigint as count + from pg_catalog.pg_class pgc + where pgc.relname = 'edc_transfer_process' + ) c + where i.transferprocess_id = o.transferprocess_id and c.count > 3000; + + return null; +end; +$$ language plpgsql; + +create trigger delete_old_rows after insert + on edc_transfer_process + for each row +execute function transfer_process_delete_old_rows(); \ No newline at end of file