From ec47f2e9e4a8fd7a2b3ff103b56048af446616b5 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 12 Jun 2024 17:00:27 -0700 Subject: [PATCH 01/81] Update versions for hotfix --- gemma-cli/pom.xml | 2 +- gemma-core/pom.xml | 2 +- gemma-groovy-support/pom.xml | 2 +- gemma-rest/pom.xml | 2 +- gemma-web/pom.xml | 2 +- pom.xml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/gemma-cli/pom.xml b/gemma-cli/pom.xml index 03eec0e6bc..1389123481 100644 --- a/gemma-cli/pom.xml +++ b/gemma-cli/pom.xml @@ -3,7 +3,7 @@ gemma gemma - 1.31.7 + 1.31.8-SNAPSHOT 4.0.0 gemma-cli diff --git a/gemma-core/pom.xml b/gemma-core/pom.xml index b3bd365e8a..74b564edd5 100644 --- a/gemma-core/pom.xml +++ b/gemma-core/pom.xml @@ -3,7 +3,7 @@ gemma gemma - 1.31.7 + 1.31.8-SNAPSHOT 4.0.0 gemma-core diff --git a/gemma-groovy-support/pom.xml b/gemma-groovy-support/pom.xml index 24abb9aed4..c34f0864bb 100644 --- a/gemma-groovy-support/pom.xml +++ b/gemma-groovy-support/pom.xml @@ -6,7 +6,7 @@ gemma gemma - 1.31.7 + 1.31.8-SNAPSHOT gemma-groovy-support diff --git a/gemma-rest/pom.xml b/gemma-rest/pom.xml index 35f3b96417..f60f2918b6 100644 --- a/gemma-rest/pom.xml +++ b/gemma-rest/pom.xml @@ -5,7 +5,7 @@ gemma gemma - 1.31.7 + 1.31.8-SNAPSHOT 4.0.0 diff --git a/gemma-web/pom.xml b/gemma-web/pom.xml index dbf9c28f7a..c70cd89e5a 100644 --- a/gemma-web/pom.xml +++ b/gemma-web/pom.xml @@ -3,7 +3,7 @@ gemma gemma - 1.31.7 + 1.31.8-SNAPSHOT 4.0.0 gemma-web diff --git a/pom.xml b/pom.xml index aae6b6e848..c2271e3c9f 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ Gemma gemma gemma - 1.31.7 + 1.31.8-SNAPSHOT 2005 The Gemma Project for meta-analysis of genomics data https://gemma.msl.ubc.ca From 9a32566fee718d07a6bf88a576a6c4fa3e6f4fa1 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 12 Jun 2024 17:01:57 -0700 Subject: [PATCH 02/81] Fix removal of experiment with phantom other parts (fix #1009) --- .../experiment/ExpressionExperimentDaoImpl.java | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index 60a123dc32..4d44a68ebe 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -31,7 +31,6 @@ import org.springframework.stereotype.Repository; import org.springframework.util.Assert; import ubic.gemma.core.analysis.expression.diff.BaselineSelection; -import ubic.gemma.model.expression.experiment.ExperimentalDesignUtils; import ubic.gemma.core.profiling.StopWatchUtils; import ubic.gemma.model.association.GOEvidenceCode; import ubic.gemma.model.common.Identifiable; @@ -1822,14 +1821,20 @@ public void remove( ExpressionExperiment ee ) { ee.getCurationDetails().setLastNoteUpdateEvent( null ); ee.getCurationDetails().setLastTroubledEvent( null ); - // dissociate this EE from other parts - if ( !ee.getOtherParts().isEmpty() ) { - log.info( String.format( "Detaching split experiment from %d other parts", ee.getOtherParts().size() ) ); - for ( ExpressionExperiment e : ee.getOtherParts() ) { + // dissociate this EE from other parts that refers to it + // it's not reliable to check the otherParts collection because the relation is bi-directional and some dataset + // might refer to this EE and not the other way around + //noinspection unchecked + List otherParts = getSessionFactory().getCurrentSession() + .createQuery( "select distinct ee from ExpressionExperiment ee join ee.otherParts op where op = :ee" ) + .setParameter( "ee", ee ) + .list(); + if ( !otherParts.isEmpty() ) { + log.info( String.format( "Detaching split experiment from %d other parts", otherParts.size() ) ); + for ( ExpressionExperiment e : otherParts ) { log.debug( "Detaching from " + e ); e.getOtherParts().remove( ee ); } - ee.getOtherParts().clear(); } // detach from BAs from dimensions, completely detached dimension will be removed later From 09d02e0e19543ac83da341627916acfb49532d9d Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 13 Jun 2024 10:48:46 -0700 Subject: [PATCH 03/81] Indicate how much time table updates are taking --- .../maintenance/TableMaintenanceUtilImpl.java | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilImpl.java index 8343cb47d4..55a6a53bd0 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilImpl.java @@ -21,6 +21,7 @@ import io.micrometer.core.annotation.Timed; import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.time.StopWatch; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.hibernate.SessionFactory; @@ -29,8 +30,9 @@ import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import org.springframework.util.Assert; -import ubic.gemma.model.common.auditAndSecurity.Auditable; +import ubic.gemma.core.util.MailEngine; import ubic.gemma.model.common.auditAndSecurity.AuditEvent; +import ubic.gemma.model.common.auditAndSecurity.Auditable; import ubic.gemma.model.common.auditAndSecurity.eventType.ArrayDesignGeneMappingEvent; import ubic.gemma.model.common.description.ExternalDatabase; import ubic.gemma.model.common.description.ExternalDatabases; @@ -41,7 +43,6 @@ import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; import ubic.gemma.persistence.service.common.description.ExternalDatabaseService; import ubic.gemma.persistence.service.genome.GeneDao; -import ubic.gemma.core.util.MailEngine; import javax.annotation.Nullable; import java.io.IOException; @@ -236,6 +237,7 @@ public void updateGene2CsEntries() { @Timed public int updateExpressionExperiment2CharacteristicEntries( @Nullable Date sinceLastUpdate, boolean truncate ) { Assert.isTrue( !( sinceLastUpdate != null && truncate ), "Cannot perform a partial update with sinceLastUpdate with truncate." ); + StopWatch timer = StopWatch.createStarted(); log.info( String.format( "Updating the EXPRESSION_EXPERIMENT2CHARACTERISTIC table%s...", sinceLastUpdate != null ? " since " + sinceLastUpdate : "" ) ); if ( truncate ) { @@ -259,9 +261,10 @@ public int updateExpressionExperiment2CharacteristicEntries( @Nullable Date sinc .setParameter( "edClass", ExperimentalDesign.class ) .setParameter( "since", sinceLastUpdate ) .executeUpdate(); - log.info( String.format( "Done updating the EXPRESSION_EXPERIMENT2CHARACTERISTIC table; %d entries were updated%s.", + log.info( String.format( "Done updating the EXPRESSION_EXPERIMENT2CHARACTERISTIC table; %d entries were updated%s in %d ms.", updated, - sinceLastUpdate != null ? " since " + sinceLastUpdate : "" ) ); + sinceLastUpdate != null ? " since " + sinceLastUpdate : "", + timer.getTime() ) ); return updated; } @@ -284,6 +287,7 @@ public int updateExpressionExperiment2CharacteristicEntries( Class level, @Nu } else { throw new IllegalArgumentException( "Level must be one of ExpressionExperiment.class, BioMaterial.class or ExperimentalDesign.class." ); } + StopWatch timer = StopWatch.createStarted(); log.info( String.format( "Updating the EXPRESSION_EXPERIMENT2CHARACTERISTIC table at %s level%s...", level.getSimpleName(), sinceLastUpdate != null ? " since " + sinceLastUpdate : "" ) ); @@ -303,23 +307,25 @@ public int updateExpressionExperiment2CharacteristicEntries( Class level, @Nu .setParameter( levelParamName, level ) .setParameter( "since", sinceLastUpdate ) .executeUpdate(); - log.info( String.format( "Done updating the EXPRESSION_EXPERIMENT2CHARACTERISTIC table at %s level; %d entries were updated%s.", + log.info( String.format( "Done updating the EXPRESSION_EXPERIMENT2CHARACTERISTIC table at %s level; %d entries were updated%s in %d ms.", level.getSimpleName(), updated, - sinceLastUpdate != null ? " since " + sinceLastUpdate : "" ) ); + sinceLastUpdate != null ? " since " + sinceLastUpdate : "", + timer.getTime() ) ); return updated; } @Override @Transactional public int updateExpressionExperiment2ArrayDesignEntries( @Nullable Date sinceLastUpdate ) { + StopWatch timer = StopWatch.createStarted(); log.info( String.format( "Updating the EXPRESSION_EXPERIMENT2ARRAY_DESIGN table%s...", sinceLastUpdate != null ? " since " + sinceLastUpdate : "" ) ); int updated = sessionFactory.getCurrentSession().createSQLQuery( EE2AD_QUERY ) .addSynchronizedQuerySpace( EE2AD_QUERY_SPACE ) .setParameter( "since", sinceLastUpdate ) .executeUpdate(); - log.info( String.format( "Done updating the EXPRESSION_EXPERIMENT2ARRAY_DESIGN table; %d entries were updated%s.", - updated, sinceLastUpdate != null ? " since " + sinceLastUpdate : "" ) ); + log.info( String.format( "Done updating the EXPRESSION_EXPERIMENT2ARRAY_DESIGN table; %d entries were updated%s in %d ms.", + updated, sinceLastUpdate != null ? " since " + sinceLastUpdate : "", timer.getTime() ) ); return updated; } @@ -338,12 +344,13 @@ public void disableEmail() { * @see GeneDao for where the GENE2CS table is used extensively. */ private void generateGene2CsEntries() { + StopWatch timer = StopWatch.createStarted(); TableMaintenanceUtilImpl.log.info( "Updating the GENE2CS table..." ); int updated = this.sessionFactory.getCurrentSession() .createSQLQuery( TableMaintenanceUtilImpl.GENE2CS_REPOPULATE_QUERY ) .addSynchronizedQuerySpace( GENE2CS_QUERY_SPACE ) .executeUpdate(); - TableMaintenanceUtilImpl.log.info( String.format( "Done regenerating the GENE2CS table; %d entries were updated.", updated ) ); + TableMaintenanceUtilImpl.log.info( String.format( "Done regenerating the GENE2CS table; %d entries were updated in %d ms.", updated, timer.getTime() ) ); } /** From 744d5abd8055ffcad7481db9bb073c69bbe64d56 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 13 Jun 2024 11:18:45 -0700 Subject: [PATCH 04/81] Reschedule update of EE2C and EE2AD tables after working hours (fix #1148) --- .../gemma/applicationContext-schedule.xml | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml b/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml index 9ed16c18b7..cb24ce1dec 100644 --- a/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml +++ b/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml @@ -115,8 +115,8 @@ - - + + @@ -131,9 +131,8 @@ - - - + + @@ -148,9 +147,8 @@ - - - + + @@ -164,9 +162,8 @@ - - - + + From 5e3c749926d3d3850fad6a8ab1629100dc12e2a0 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 13 Jun 2024 11:19:27 -0700 Subject: [PATCH 05/81] Use constants for class names in table update queries --- .../maintenance/TableMaintenanceUtilImpl.java | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilImpl.java index 55a6a53bd0..ad059de499 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilImpl.java @@ -95,7 +95,7 @@ public class TableMaintenanceUtilImpl implements TableMaintenanceUtil { private static final String CD_LAST_UPDATED_SINCE = "(CD.LAST_UPDATED is null or :since is null or CD.LAST_UPDATED >= :since)"; private static final String EE2C_EE_QUERY = - "select MIN(C.ID), C.NAME, C.DESCRIPTION, C.CATEGORY, C.CATEGORY_URI, C.`VALUE`, C.VALUE_URI, C.ORIGINAL_VALUE, C.EVIDENCE_CODE, I.ID, (" + SELECT_ANONYMOUS_MASK + "), cast(:eeClass as char(255)) " + "select C.ID, C.NAME, C.DESCRIPTION, C.CATEGORY, C.CATEGORY_URI, C.`VALUE`, C.VALUE_URI, C.ORIGINAL_VALUE, C.EVIDENCE_CODE, I.ID, (" + SELECT_ANONYMOUS_MASK + "), 'ubic.gemma.model.expression.experiment.ExpressionExperiment' " + "from INVESTIGATION I " + "join CURATION_DETAILS CD on I.CURATION_DETAILS_FK = CD.ID " + "join CHARACTERISTIC C on I.ID = C.INVESTIGATION_FK " @@ -104,7 +104,7 @@ public class TableMaintenanceUtilImpl implements TableMaintenanceUtil { + "group by I.ID, COALESCE(C.CATEGORY_URI, C.CATEGORY), COALESCE(C.VALUE_URI, C.`VALUE`)"; private static final String EE2C_BM_QUERY = - "select MIN(C.ID), C.NAME, C.DESCRIPTION, C.CATEGORY, C.CATEGORY_URI, C.`VALUE`, C.VALUE_URI, C.ORIGINAL_VALUE, C.EVIDENCE_CODE, I.ID, (" + SELECT_ANONYMOUS_MASK + "), cast(:bmClass as char(255)) " + "select C.ID, C.NAME, C.DESCRIPTION, C.CATEGORY, C.CATEGORY_URI, C.`VALUE`, C.VALUE_URI, C.ORIGINAL_VALUE, C.EVIDENCE_CODE, I.ID, (" + SELECT_ANONYMOUS_MASK + "), 'ubic.gemma.model.expression.biomaterial.BioMaterial' " + "from INVESTIGATION I " + "join CURATION_DETAILS CD on I.CURATION_DETAILS_FK = CD.ID " + "join BIO_ASSAY BA on I.ID = BA.EXPRESSION_EXPERIMENT_FK " @@ -115,7 +115,7 @@ public class TableMaintenanceUtilImpl implements TableMaintenanceUtil { + "group by I.ID, COALESCE(C.CATEGORY_URI, C.CATEGORY), COALESCE(C.VALUE_URI, C.`VALUE`)"; private static final String EE2C_ED_QUERY = - "select MIN(C.ID), C.NAME, C.DESCRIPTION, C.CATEGORY, C.CATEGORY_URI, C.`VALUE`, C.VALUE_URI, C.ORIGINAL_VALUE, C.EVIDENCE_CODE, I.ID, (" + SELECT_ANONYMOUS_MASK + "), cast(:edClass as char(255)) " + "select C.ID, C.NAME, C.DESCRIPTION, C.CATEGORY, C.CATEGORY_URI, C.`VALUE`, C.VALUE_URI, C.ORIGINAL_VALUE, C.EVIDENCE_CODE, I.ID, (" + SELECT_ANONYMOUS_MASK + "), 'ubic.gemma.model.expression.experiment.ExperimentalDesign' " + "from INVESTIGATION I " + "join CURATION_DETAILS CD on I.CURATION_DETAILS_FK = CD.ID " + "join EXPERIMENTAL_DESIGN on I.EXPERIMENTAL_DESIGN_FK = EXPERIMENTAL_DESIGN.ID " @@ -126,7 +126,7 @@ public class TableMaintenanceUtilImpl implements TableMaintenanceUtil { // remove C.class = 'Statement' once the old-style characteristics are removed (see https://github.com/PavlidisLab/Gemma/issues/929 for details) + "and C.class = 'Statement' " + "and " + CD_LAST_UPDATED_SINCE + " " - + "group by I.ID, COALESCE(C.CATEGORY_URI, C.CATEGORY), COALESCE(C.VALUE_URI, C.`VALUE`)"; + + "group by I.ID, COALESCE(C.CATEGORY_URI, C.CATEGORY), COALESCE(C.VALUE_URI, C.`VALUE`) "; private static final String EE2AD_QUERY = "insert into EXPRESSION_EXPERIMENT2ARRAY_DESIGN (EXPRESSION_EXPERIMENT_FK, ARRAY_DESIGN_FK, IS_ORIGINAL_PLATFORM, ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK) " + "select I.ID, AD.ID, FALSE, (" + SELECT_ANONYMOUS_MASK + ") from INVESTIGATION I " @@ -256,9 +256,6 @@ public int updateExpressionExperiment2CharacteristicEntries( @Nullable Date sinc + EE2C_ED_QUERY + " " + "on duplicate key update NAME = VALUES(NAME), DESCRIPTION = VALUES(DESCRIPTION), CATEGORY = VALUES(CATEGORY), CATEGORY_URI = VALUES(CATEGORY_URI), `VALUE` = VALUES(`VALUE`), VALUE_URI = VALUES(VALUE_URI), ORIGINAL_VALUE = VALUES(ORIGINAL_VALUE), EVIDENCE_CODE = VALUES(EVIDENCE_CODE), ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK = VALUES(ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK), LEVEL = VALUES(LEVEL)" ) .addSynchronizedQuerySpace( EE2C_QUERY_SPACE ) - .setParameter( "eeClass", ExpressionExperiment.class ) - .setParameter( "bmClass", BioMaterial.class ) - .setParameter( "edClass", ExperimentalDesign.class ) .setParameter( "since", sinceLastUpdate ) .executeUpdate(); log.info( String.format( "Done updating the EXPRESSION_EXPERIMENT2CHARACTERISTIC table; %d entries were updated%s in %d ms.", @@ -273,16 +270,12 @@ public int updateExpressionExperiment2CharacteristicEntries( @Nullable Date sinc @Transactional public int updateExpressionExperiment2CharacteristicEntries( Class level, @Nullable Date sinceLastUpdate, boolean truncate ) { Assert.isTrue( !( sinceLastUpdate != null && truncate ), "Cannot perform a partial update with sinceLastUpdate with truncate." ); - String levelParamName; String query; if ( level.equals( ExpressionExperiment.class ) ) { - levelParamName = "eeClass"; query = EE2C_EE_QUERY; } else if ( level.equals( BioMaterial.class ) ) { - levelParamName = "bmClass"; query = EE2C_BM_QUERY; } else if ( level.equals( ExperimentalDesign.class ) ) { - levelParamName = "edClass"; query = EE2C_ED_QUERY; } else { throw new IllegalArgumentException( "Level must be one of ExpressionExperiment.class, BioMaterial.class or ExperimentalDesign.class." ); @@ -304,7 +297,6 @@ public int updateExpressionExperiment2CharacteristicEntries( Class level, @Nu + query + " " + "on duplicate key update NAME = VALUES(NAME), DESCRIPTION = VALUES(DESCRIPTION), CATEGORY = VALUES(CATEGORY), CATEGORY_URI = VALUES(CATEGORY_URI), `VALUE` = VALUES(`VALUE`), VALUE_URI = VALUES(VALUE_URI), ORIGINAL_VALUE = VALUES(ORIGINAL_VALUE), EVIDENCE_CODE = VALUES(EVIDENCE_CODE), ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK = VALUES(ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK), LEVEL = VALUES(LEVEL)" ) .addSynchronizedQuerySpace( EE2C_QUERY_SPACE ) - .setParameter( levelParamName, level ) .setParameter( "since", sinceLastUpdate ) .executeUpdate(); log.info( String.format( "Done updating the EXPRESSION_EXPERIMENT2CHARACTERISTIC table at %s level; %d entries were updated%s in %d ms.", From c15f5d1902184e8076d189c96b886940b6a655fa Mon Sep 17 00:00:00 2001 From: Paul Pavlidis Date: Fri, 14 Jun 2024 13:20:17 -0700 Subject: [PATCH 06/81] may finally address #915 and #651 --- .../gemma___U_clean_install__DskipTests_.xml | 69 +++++++++++++++++++ .../preprocess/VectorMergingServiceImpl.java | 1 + ...essionExperimentPlatformSwitchService.java | 6 +- ...cessedExpressionDataCreateServiceTest.java | 47 +++++++++++-- .../preprocess/VectorMergingServiceTest.java | 8 +-- 5 files changed, 120 insertions(+), 11 deletions(-) create mode 100644 .idea/runConfigurations/gemma___U_clean_install__DskipTests_.xml diff --git a/.idea/runConfigurations/gemma___U_clean_install__DskipTests_.xml b/.idea/runConfigurations/gemma___U_clean_install__DskipTests_.xml new file mode 100644 index 0000000000..39d204d6da --- /dev/null +++ b/.idea/runConfigurations/gemma___U_clean_install__DskipTests_.xml @@ -0,0 +1,69 @@ + + + + + + + + + + \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/VectorMergingServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/VectorMergingServiceImpl.java index e336a1ed0a..608c6da026 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/VectorMergingServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/VectorMergingServiceImpl.java @@ -231,6 +231,7 @@ public void mergeVectors( ExpressionExperiment ee ) { // replace raw vectors with expressionExperimentService.removeAllRawDataVectors( ee ); ee.getRawExpressionDataVectors().addAll( newVectors ); + ee.getQuantitationTypes().addAll( qt2Vec.keySet() ); // remove processed vectors expressionExperimentService.removeProcessedDataVectors( ee ); diff --git a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/ExpressionExperimentPlatformSwitchService.java b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/ExpressionExperimentPlatformSwitchService.java index 3ee490a3d9..473edac037 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/ExpressionExperimentPlatformSwitchService.java +++ b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/ExpressionExperimentPlatformSwitchService.java @@ -180,7 +180,7 @@ public void switchExperimentToArrayDesign( ExpressionExperiment ee, ArrayDesign for ( ArrayDesign oldAd : oldArrayDesigns ) { log.info( String.format( "Switching vectors from %s to %s", oldAd.getShortName(), arrayDesign.getShortName() ) ); totalVectorsSwitched += this.switchDataForPlatform( ee, arrayDesign, designElementMap, - targetBioAssayDimension, usedDesignElements, oldAd ); + targetBioAssayDimension /* for case 1, will be null */, usedDesignElements, oldAd ); } if ( totalVectorsSwitched == 0 && hasData ) { @@ -200,7 +200,7 @@ public void switchExperimentToArrayDesign( ExpressionExperiment ee, ArrayDesign ee.setDescription( ee.getDescription() + " " + descriptionUpdate ); } - if ( targetBioAssayDimension != null && !unusedBADs.isEmpty() ) { + if ( targetBioAssayDimension != null && !unusedBADs.isEmpty() ) { // Case 2 log.info( "Cleaning up unused BioAssays from previous platforms..." ); this.cleanupUnused( ee, unusedBADs, targetBioAssayDimension ); } @@ -210,7 +210,7 @@ public void switchExperimentToArrayDesign( ExpressionExperiment ee, ArrayDesign "Switch to use " + arrayDesign.getShortName() ); log.info( "Completing switching " + ee ); // flush of transaction happens after this, can take a while. - if ( hasData ) { + if ( hasData && targetBioAssayDimension != null /* case 2 */ ) { log.info( ee + " has data, regenerating processed data vectors..." ); processedExpressionDataVectorService.createProcessedDataVectors( ee ); // this still fails sometimes? works fine if run later by cli } diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/ProcessedExpressionDataCreateServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/ProcessedExpressionDataCreateServiceTest.java index c0d7f586d0..f41ca1aa45 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/ProcessedExpressionDataCreateServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/ProcessedExpressionDataCreateServiceTest.java @@ -19,17 +19,23 @@ package ubic.gemma.core.analysis.preprocess; import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.RandomStringUtils; +import org.junit.After; import org.junit.Test; import org.junit.experimental.categories.Category; import org.springframework.beans.factory.annotation.Autowired; import ubic.basecode.io.ByteArrayConverter; import ubic.gemma.core.analysis.report.ExpressionExperimentReportService; import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix; +import ubic.gemma.core.loader.expression.ExpressionExperimentPlatformSwitchService; +import ubic.gemma.core.loader.expression.arrayDesign.ArrayDesignMergeService; import ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest; +import ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator; import ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal; import ubic.gemma.core.loader.expression.geo.service.GeoService; import ubic.gemma.core.loader.util.AlreadyExistsInSystemException; import ubic.gemma.core.util.test.category.SlowTest; +import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.bioAssay.BioAssay; import ubic.gemma.model.expression.bioAssay.BioAssayValueObject; import ubic.gemma.model.expression.bioAssayData.BioAssayDimension; @@ -39,6 +45,7 @@ import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.expression.designElement.CompositeSequenceValueObject; import ubic.gemma.model.expression.experiment.*; +import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.expression.bioAssayData.BioAssayDimensionService; import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService; import ubic.gemma.persistence.service.expression.biomaterial.BioMaterialService; @@ -66,6 +73,9 @@ public class ProcessedExpressionDataCreateServiceTest extends AbstractGeoService @Autowired private ExperimentalFactorService experimentalFactorService; + @Autowired + private ArrayDesignService arrayDesignService; + @Autowired private ExpressionExperimentReportService expressionExperimentReportService; @@ -75,25 +85,52 @@ public class ProcessedExpressionDataCreateServiceTest extends AbstractGeoService @Autowired private BioAssayDimensionService bioAssayDimensionService; + @Autowired + private ArrayDesignMergeService arrayDesignMergeService; + + @Autowired + private ExpressionExperimentPlatformSwitchService expressionExperimentPlatformSwitchService; + private ExpressionExperiment ee = null; - @SuppressWarnings("unchecked") + @After + public void tearDown() { + if ( ee != null ) { + try { + Collection arrayDesignsUsed = eeService.getArrayDesignsUsed( ee ); + eeService.remove( ee ); + arrayDesignService.remove( arrayDesignsUsed ); + } catch ( Exception e ) { + log.error( "Error during teardown", e ); + } + } + } + @Test @Category(SlowTest.class) public void testComputeDevRankForExpressionExperimentB() throws Exception { try { - geoService.setGeoDomainObjectGenerator( - new GeoDomainObjectGeneratorLocal( this.getTestFileBasePath( "GSE5949short" ) ) ); + GeoDomainObjectGenerator f = new GeoDomainObjectGeneratorLocal( this.getTestFileBasePath( "GSE5949short" ) ); + f.setDoSampleMatching( true ); // enable so platform switch is realistic + geoService.setGeoDomainObjectGenerator( f ); Collection results = ( Collection ) geoService .fetchAndLoad( "GSE5949", false, true, false ); this.ee = results.iterator().next(); } catch ( AlreadyExistsInSystemException e ) { - this.ee = ( ( Collection ) e.getData() ).iterator().next(); + fail( "GSE5949 needs to be deleted prior to test" ); } ee = this.eeService.thawLite( ee ); + // Add test of platform merge-and-switch + Collection designs = eeService.getArrayDesignsUsed( ee ); + ArrayDesign one = designs.iterator().next(); + arrayDesignMergeService.merge( one, designs, "mergedTESTFORGSE5949", "mergedTESTFOR_GSE5949_" + + RandomStringUtils.randomAlphabetic( 5 ), false ); + expressionExperimentPlatformSwitchService.switchExperimentToMergedPlatform( ee ); + ee = this.eeService.thawLite( ee ); // essential. + processedExpressionDataVectorService.computeProcessedExpressionData( ee ); Collection preferredVectors = this.processedExpressionDataVectorService .getProcessedDataVectors( ee ); @@ -108,6 +145,8 @@ public void testComputeDevRankForExpressionExperimentB() throws Exception { } assertNotNull( ee.getNumberOfDataVectors() ); + assertEquals( 500, ( long ) ee.getNumberOfDataVectors() ); + assertEquals(2, ee.getBioAssays().size()); ExpressionExperimentValueObject s = expressionExperimentReportService.generateSummary( ee.getId() ); assertNotNull( s ); assertEquals( ee.getNumberOfDataVectors(), s.getProcessedExpressionVectorCount() ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/VectorMergingServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/VectorMergingServiceTest.java index 059d475adc..dc333cf981 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/VectorMergingServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/VectorMergingServiceTest.java @@ -128,7 +128,6 @@ public void tearDown() { @Test @Category(SlowTest.class) - @Ignore("There's a regression that will be fixed in a subsequent patch release (see https://github.com/PavlidisLab/Gemma/issues/651)") final public void test() throws Exception { /* * Need a persistent experiment that uses multiple array designs. Then merge the designs, switch the vectors, @@ -208,7 +207,7 @@ final public void test() throws Exception { ee = eeService.thaw( ee ); - eePlatformSwitchService.switchExperimentToArrayDesign( ee, mergedAA ); + eePlatformSwitchService.switchExperimentToArrayDesign( ee, mergedAA ); // prerequisite for vector merging ee = eeService.thaw( ee ); // check we actually got switched over. for ( BioAssay ba : ee.getBioAssays() ) { @@ -218,7 +217,7 @@ final public void test() throws Exception { assertEquals( mergedAA, v.getDesignElement().getArrayDesign() ); } - assertEquals( 16, ee.getQuantitationTypes().size() ); + assertEquals( 15, ee.getQuantitationTypes().size() ); assertEquals( 1828, ee.getRawExpressionDataVectors().size() ); vectorMergingService.mergeVectors( ee ); @@ -228,6 +227,7 @@ final public void test() throws Exception { assertEquals( 46, ee.getNumberOfSamples().intValue() ); assertEquals( 978, ee.getRawExpressionDataVectors().size() ); + assertEquals( 15, ee.getQuantitationTypes().size() ); assertTrue( ee.getProcessedExpressionDataVectors().isEmpty() ); assertEquals( 0, ee.getNumberOfDataVectors().intValue() ); @@ -249,7 +249,7 @@ final public void test() throws Exception { Collection processedDataArrays = processedExpressionDataVectorService .getProcessedDataArrays( ee, 50 ); - assertEquals( 50, processedDataArrays.size() ); + assertEquals( 28, processedDataArrays.size() ); } From f90dc6b8f6b480e9c52d9663f91510b3be8e35c9 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 14 Jun 2024 13:28:40 -0700 Subject: [PATCH 07/81] Clarify the signature of ExpressionExperimentService.addRawVectors() that it expects a single QT --- .../loader/expression/DataUpdaterImpl.java | 2 +- .../ExpressionExperimentService.java | 2 +- .../ExpressionExperimentServiceImpl.java | 18 +++++------------- 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/DataUpdaterImpl.java b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/DataUpdaterImpl.java index 9b97743c41..5e01124c2b 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/DataUpdaterImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/DataUpdaterImpl.java @@ -590,7 +590,7 @@ public void addData( ExpressionExperiment ee, ArrayDesign targetPlatform, Expres throw new IllegalStateException( "no vectors!" ); } - experimentService.addRawVectors( ee, vectors ); + experimentService.addRawDataVectors( ee, qt, vectors ); this.audit( ee, "Data vectors added for " + targetPlatform + ", " + qt, false ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java index d3e329ea23..18b3284efa 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java @@ -99,7 +99,7 @@ public interface ExpressionExperimentService * @return the number of added vectors */ @Secured({ "GROUP_USER", "ACL_SECURABLE_EDIT" }) - int addRawVectors( ExpressionExperiment eeToUpdate, Collection newVectors ); + int addRawDataVectors( ExpressionExperiment eeToUpdate, QuantitationType quantitationType, Collection newVectors ); /** * @see ExpressionExperimentDao#replaceRawDataVectors(ExpressionExperiment, QuantitationType, Collection) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java index 4a09caa46f..760ad3e2e3 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java @@ -229,24 +229,18 @@ public void addFactorValues( ExpressionExperiment ee, Map newVectors ) { Collection BADs = new HashSet<>(); - Collection qts = new HashSet<>(); for ( RawExpressionDataVector vec : newVectors ) { BADs.add( vec.getBioAssayDimension() ); - qts.add( vec.getQuantitationType() ); } if ( BADs.size() > 1 ) { throw new IllegalArgumentException( "Vectors must share a common bioassay dimension" ); } - if ( qts.size() > 1 ) { - throw new UnsupportedOperationException( - "Can only replace with one type of vector (only one quantitation type)" ); - } - BioAssayDimension bad = BADs.iterator().next(); if ( bad.getId() == null ) { log.info( "Creating " + bad + "..." ); @@ -261,9 +255,7 @@ public int addRawVectors( ExpressionExperiment ee, ba.setArrayDesignUsed( vectorAd ); } - QuantitationType qt = newVectors.iterator().next().getQuantitationType(); - - return expressionExperimentDao.addRawDataVectors( ee, qt, newVectors ); + return expressionExperimentDao.addRawDataVectors( ee, quantitationType, newVectors ); } @Override @@ -298,8 +290,8 @@ public int replaceAllRawDataVectors( ExpressionExperiment ee, .collect( Collectors.groupingBy( RawExpressionDataVector::getQuantitationType, Collectors.toSet() ) ); int added = 0; - for ( Collection vectors : BADs.values() ) { - added += this.addRawVectors( ee, vectors ); + for ( Map.Entry> e : BADs.entrySet() ) { + added += this.addRawDataVectors( ee, e.getKey(), e.getValue() ); } return added; From 27f381a9170f55aefffaa7b04d69848f3d689a61 Mon Sep 17 00:00:00 2001 From: Paul Pavlidis Date: Fri, 14 Jun 2024 16:49:01 -0700 Subject: [PATCH 08/81] this extra cleanup is annoying to get working, I'll just rely on us having a clean db. --- .../ProcessedExpressionDataCreateServiceTest.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/ProcessedExpressionDataCreateServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/ProcessedExpressionDataCreateServiceTest.java index f41ca1aa45..cbd57232bc 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/ProcessedExpressionDataCreateServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/ProcessedExpressionDataCreateServiceTest.java @@ -97,9 +97,13 @@ public class ProcessedExpressionDataCreateServiceTest extends AbstractGeoService public void tearDown() { if ( ee != null ) { try { - Collection arrayDesignsUsed = eeService.getArrayDesignsUsed( ee ); + // Collection arrayDesignsUsed = eeService.getArrayDesignsUsed( ee ); eeService.remove( ee ); - arrayDesignService.remove( arrayDesignsUsed ); +// for ( ArrayDesign arrayDesign : arrayDesignsUsed ) { +// arrayDesign = arrayDesignService.thawLite( arrayDesign ); +// arrayDesignService.remove( arrayDesign.getMergees() ); +// } +// arrayDesignService.remove( arrayDesignsUsed ); } catch ( Exception e ) { log.error( "Error during teardown", e ); } @@ -146,7 +150,7 @@ public void testComputeDevRankForExpressionExperimentB() throws Exception { assertNotNull( ee.getNumberOfDataVectors() ); assertEquals( 500, ( long ) ee.getNumberOfDataVectors() ); - assertEquals(2, ee.getBioAssays().size()); + assertEquals( 2, ee.getBioAssays().size() ); ExpressionExperimentValueObject s = expressionExperimentReportService.generateSummary( ee.getId() ); assertNotNull( s ); assertEquals( ee.getNumberOfDataVectors(), s.getProcessedExpressionVectorCount() ); From 580747680af3eeab543f51f0f619250cf8e62518 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 17 Jun 2024 11:19:44 -0700 Subject: [PATCH 09/81] Don't update a set after removing an experiment from it (fix #1153) --- .../ExpressionExperimentServiceImpl.java | 1 - ...ssionExperimentServiceIntegrationTest.java | 41 ++++++++++++++++++- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java index 760ad3e2e3..c34c113a8a 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java @@ -1595,7 +1595,6 @@ public void remove( ExpressionExperiment ee ) { } else { AbstractService.log.info( "Removing " + ee + " from " + eeSet ); eeSet.getExperiments().remove( ee ); - this.expressionExperimentSetService.update( eeSet ); // update set to not reference this experiment. } } diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceIntegrationTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceIntegrationTest.java index 269ef9be60..ff4206c331 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceIntegrationTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceIntegrationTest.java @@ -25,6 +25,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.security.access.AccessDeniedException; import ubic.gemma.core.util.test.BaseSpringContextTest; +import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; import ubic.gemma.model.common.auditAndSecurity.AuditAction; import ubic.gemma.model.common.auditAndSecurity.AuditEvent; import ubic.gemma.model.common.auditAndSecurity.Contact; @@ -37,10 +38,10 @@ import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.expression.experiment.*; import ubic.gemma.model.genome.Taxon; -import ubic.gemma.persistence.service.maintenance.TableMaintenanceUtil; +import ubic.gemma.persistence.service.blacklist.BlacklistedEntityService; import ubic.gemma.persistence.service.common.description.CharacteristicService; import ubic.gemma.persistence.service.expression.bioAssayData.RawExpressionDataVectorService; -import ubic.gemma.persistence.service.blacklist.BlacklistedEntityService; +import ubic.gemma.persistence.service.maintenance.TableMaintenanceUtil; import ubic.gemma.persistence.util.Filter; import ubic.gemma.persistence.util.Filters; import ubic.gemma.persistence.util.Slice; @@ -72,6 +73,8 @@ public class ExpressionExperimentServiceIntegrationTest extends BaseSpringContex private TableMaintenanceUtil tableMaintenanceUtil; @Autowired private CharacteristicService characteristicService; + @Autowired + private ExpressionExperimentSetService expressionExperimentSetService; /** * A collection of {@link ExpressionExperiment} that will be removed at the end of the test. @@ -485,6 +488,40 @@ public void testUpdateWithTransientEntity() { .hasMessageContaining( "ID is required to be non-null" ); } + @Test + public void testRemoveExperimentInSet() { + ExpressionExperiment ee1 = createExpressionExperiment(); + ExpressionExperiment ee2 = createExpressionExperiment(); + ExpressionExperimentSet eeSet = new ExpressionExperimentSet(); + eeSet.setName( "test" ); + eeSet.setTaxon( ee1.getTaxon() ); + eeSet.getExperiments().add( ee1 ); + eeSet.getExperiments().add( ee2 ); + eeSet = expressionExperimentSetService.create( eeSet ); + expressionExperimentService.remove( ee1 ); + ees.remove( ee1 ); // prevent removal in teardown + eeSet = expressionExperimentSetService.load( eeSet.getId() ); + assertNotNull( eeSet ); + eeSet = expressionExperimentSetService.thaw( eeSet ); + assertNotNull( eeSet ); + assertThat( eeSet.getExperiments() ) + .containsExactly( ee2 ); + } + + @Test + public void testRemoveExperimentInSingletonSet() { + ExpressionExperiment ee1 = createExpressionExperiment(); + ExpressionExperimentSet eeSet = new ExpressionExperimentSet(); + eeSet.setName( "test" ); + eeSet.setTaxon( ee1.getTaxon() ); + eeSet.getExperiments().add( ee1 ); + eeSet = expressionExperimentSetService.create( eeSet ); + expressionExperimentService.remove( ee1 ); + ees.remove( ee1 ); // prevent removal in teardown + eeSet = expressionExperimentSetService.load( eeSet.getId() ); + assertNull( eeSet ); + } + private ExpressionExperiment createExpressionExperiment() { ExpressionExperiment ee = this.getTestPersistentCompleteExpressionExperiment( false ); ees.add( ee ); From ad62c144d1e6327344b5e16c052611d9ebf8e465 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 17 Jun 2024 11:20:15 -0700 Subject: [PATCH 10/81] Cleanup ExpressionExperimentSetService --- .../ExpressionExperimentSetService.java | 41 ---- .../ExpressionExperimentSetServiceImpl.java | 226 +----------------- ...ressionExperimentSetValueObjectHelper.java | 44 +++- ...ionExperimentSetValueObjectHelperImpl.java | 198 +++++++++++++-- .../ExpressionExperimentSetServiceTest.java | 15 +- ...ionExperimentSetValueObjectHelperTest.java | 9 +- .../ExpressionExperimentSetController.java | 21 +- 7 files changed, 242 insertions(+), 312 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetService.java index 25a95541a0..78e29ece54 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetService.java @@ -39,8 +39,6 @@ public interface ExpressionExperimentSetService extends BaseService, BaseVoEnabledService { - String AUTOMATICALLY_GENERATED_EXPERIMENT_GROUP_DESCRIPTION = "Automatically generated for %s EEs"; - @Override @Secured({ "GROUP_USER" }) ExpressionExperimentSet create( ExpressionExperimentSet expressionExperimentSet ); @@ -65,16 +63,6 @@ public interface ExpressionExperimentSetService @Secured({ "GROUP_USER", "ACL_SECURABLE_EDIT" }) void update( ExpressionExperimentSet expressionExperimentSet ); - @Secured({ "GROUP_USER" }) - ExpressionExperimentSet createFromValueObject( ExpressionExperimentSetValueObject eesvo ); - - /** - * Security is handled within method, when the set is loaded - * - * @param eesvo ee value object - */ - void deleteDatabaseEntity( ExpressionExperimentSetValueObject eesvo ); - @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "AFTER_ACL_COLLECTION_READ" }) Collection find( BioAssaySet bioAssaySet ); @@ -162,35 +150,6 @@ ExpressionExperimentSet initAutomaticallyGeneratedExperimentSet( @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "AFTER_ACL_VALUE_OBJECT_READ" }) ExpressionExperimentSetValueObject loadValueObjectById( Long id, boolean loadEEIds ); - /** - * Update corresponding entity based on value object - * - * @param eesvo ee value object - */ - void updateDatabaseEntity( ExpressionExperimentSetValueObject eesvo ); - - /** - * Updates the database record for the param experiment set value object (permission permitting) with the members - * specified of the set, not the name or description etc. - * - * @param eeIds ee ids - * @param groupId group id - */ - void updateDatabaseEntityMembers( Long groupId, Collection eeIds ); - - /** - * Updates the database record for the param experiment set value object (permission permitting) with the value - * object's name and description. - * - * @param loadEEIds whether the returned value object should have the ExpressionExperimentIds collection populated. - * This might be a useful information, but loading the IDs takes slightly longer, so for larger amount of - * EESets this might want to be avoided. - * @param eeSetVO ee set value object - * @return ee vo - */ - ExpressionExperimentSetValueObject updateDatabaseEntityNameDesc( ExpressionExperimentSetValueObject eeSetVO, - boolean loadEEIds ); - @Override @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "AFTER_ACL_VALUE_OBJECT_READ" }) ExpressionExperimentSetValueObject loadValueObjectById( Long id ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetServiceImpl.java index 4778e646e0..e384d5e98b 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetServiceImpl.java @@ -18,7 +18,6 @@ */ package ubic.gemma.persistence.service.expression.experiment; -import gemma.gsec.SecurityService; import org.apache.commons.lang3.StringUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @@ -31,11 +30,9 @@ import ubic.gemma.model.genome.Taxon; import ubic.gemma.persistence.service.AbstractVoEnabledService; import ubic.gemma.persistence.service.analysis.expression.ExpressionExperimentSetDao; -import ubic.gemma.persistence.service.genome.taxon.TaxonService; import java.util.ArrayList; import java.util.Collection; -import java.util.HashSet; import java.util.List; /** @@ -49,118 +46,18 @@ public class ExpressionExperimentSetServiceImpl extends AbstractVoEnabledService implements ExpressionExperimentSetService { + private static final String AUTOMATICALLY_GENERATED_EXPERIMENT_GROUP_DESCRIPTION = "Automatically generated for %s EEs"; + private final ExpressionExperimentSetDao expressionExperimentSetDao; - private SecurityService securityService; - private ExpressionExperimentService expressionExperimentService; - private TaxonService taxonService; - private ExpressionExperimentSetValueObjectHelper expressionExperimentValueObjectHelper; + private final ExpressionExperimentService expressionExperimentService; @Autowired - public ExpressionExperimentSetServiceImpl( ExpressionExperimentSetDao expressionExperimentSetDao ) { + public ExpressionExperimentSetServiceImpl( ExpressionExperimentSetDao expressionExperimentSetDao, ExpressionExperimentService expressionExperimentService ) { super( expressionExperimentSetDao ); this.expressionExperimentSetDao = expressionExperimentSetDao; - } - - @Autowired - public void setSecurityService( SecurityService securityService ) { - this.securityService = securityService; - } - - @Autowired - public void setExpressionExperimentService( ExpressionExperimentService expressionExperimentService ) { this.expressionExperimentService = expressionExperimentService; } - @Autowired - public void setTaxonService( TaxonService taxonService ) { - this.taxonService = taxonService; - } - - @Autowired - public void setExpressionExperimentValueObjectHelper( - ExpressionExperimentSetValueObjectHelper expressionExperimentValueObjectHelper ) { - this.expressionExperimentValueObjectHelper = expressionExperimentValueObjectHelper; - } - - @Override - @Transactional - public ExpressionExperimentSet createFromValueObject( ExpressionExperimentSetValueObject eesvo ) { - - /* - * Sanity check. - */ - Collection dups = this.findByName( eesvo.getName() ); - if ( dups == null || !dups.isEmpty() ) { - throw new IllegalArgumentException( - "Sorry, there is already a set with that name (" + eesvo.getName() + ")" ); - } - - ExpressionExperimentSet newSet = ExpressionExperimentSet.Factory.newInstance(); - newSet.setName( eesvo.getName() ); - newSet.setDescription( eesvo.getDescription() ); - - Collection datasetsAnalyzed = expressionExperimentService.load( - eesvo.getExpressionExperimentIds() ); - - newSet.getExperiments().addAll( datasetsAnalyzed ); - - if ( eesvo.getTaxonId() != null ) - newSet.setTaxon( taxonService.load( eesvo.getTaxonId() ) ); - else { - /* - * Figure out the taxon from the experiments. mustn't be heterogeneous. - */ - Taxon taxon = null; - for ( BioAssaySet bioAssaySet : newSet.getExperiments() ) { - Taxon eeTaxon = this.getTaxonForSet( bioAssaySet ); - /* - * this can be null. - */ - - if ( taxon == null ) { - taxon = eeTaxon; - } else { - assert eeTaxon != null; - if ( !eeTaxon.equals( taxon ) ) { - throw new UnsupportedOperationException( "EESets with mixed taxa are not supported" ); - } - } - } - - if ( taxon == null ) { - throw new IllegalStateException( "Could not determine taxon for new EEset" ); - } - newSet.setTaxon( taxon ); - - } - - if ( newSet.getTaxon() == null ) { - throw new IllegalArgumentException( "Unable to determine the taxon for the EESet" ); - } - - ExpressionExperimentSet newEESet = this.create( newSet ); - - // make groups private by default - if ( eesvo.getIsPublic() ) { - securityService.makePublic( newEESet ); - } else { - securityService.makePrivate( newEESet ); - } - - return newEESet; - - } - - @Override - @Transactional - public void deleteDatabaseEntity( ExpressionExperimentSetValueObject eesvo ) { - try { - this.remove( this.loadOrFail( eesvo.getId() ) ); - } catch ( Exception e ) { - throw new RuntimeException( e ); - } - } - @Override @Transactional(readOnly = true) public Collection find( BioAssaySet bioAssaySet ) { @@ -208,10 +105,8 @@ public ExpressionExperimentSet initAutomaticallyGeneratedExperimentSet( ExpressionExperimentSet eeSet; eeSet = ExpressionExperimentSet.Factory.newInstance(); eeSet.setTaxon( taxon ); - eeSet.setName( this.getMasterSetName( taxon ) ); - eeSet.setDescription( - String.format( ExpressionExperimentSetService.AUTOMATICALLY_GENERATED_EXPERIMENT_GROUP_DESCRIPTION, - String.valueOf( expressionExperiments.size() ) ) ); + eeSet.setName( "Master set for " + taxon.getCommonName() ); + eeSet.setDescription( String.format( AUTOMATICALLY_GENERATED_EXPERIMENT_GROUP_DESCRIPTION, expressionExperiments.size() ) ); eeSet.getExperiments().addAll( expressionExperiments ); return eeSet; } @@ -226,8 +121,7 @@ public ExpressionExperimentSet initAutomaticallyGeneratedExperimentSet( @Override @Transactional(readOnly = true) public boolean isAutomaticallyGenerated( String experimentSetDescription ) { - String regexDesc = String.format( - ExpressionExperimentSetService.AUTOMATICALLY_GENERATED_EXPERIMENT_GROUP_DESCRIPTION, ".*" ); + String regexDesc = String.format( AUTOMATICALLY_GENERATED_EXPERIMENT_GROUP_DESCRIPTION, ".*" ); return experimentSetDescription.matches( regexDesc ); } @@ -255,95 +149,6 @@ public ExpressionExperimentSetValueObject loadValueObjectById( Long id, boolean return this.expressionExperimentSetDao.loadValueObject( id, loadEEIds ); } - @Override - @Transactional - public void updateDatabaseEntity( ExpressionExperimentSetValueObject eesvo ) { - try { - ExpressionExperimentSet eeset = expressionExperimentValueObjectHelper.convertToEntity( eesvo ); - if ( eeset == null ) { - throw new IllegalArgumentException( "Cannot update null set" ); - } - this.update( eeset ); - } catch ( Exception e ) { - throw new RuntimeException( e ); - } - } - - /** - * update the members of the experiment set with the given ids - * - * @param groupId set to update - * @param eeIds new set member ids - */ - @Override - @Transactional - public void updateDatabaseEntityMembers( Long groupId, Collection eeIds ) { - - if ( eeIds.isEmpty() ) { - throw new IllegalArgumentException( "No expression experiment ids provided. Cannot save an empty set." ); - - } - ExpressionExperimentSet eeSet = this.load( groupId ); - - if ( eeSet == null ) { - throw new IllegalArgumentException( "No experiment set with id=" + groupId + " could be loaded. " - + "Either it does not exist or you do not have permission to view it." ); - } - - // check that new member ids are valid - Collection newExperiments = expressionExperimentService.load( eeIds ); - - if ( newExperiments.isEmpty() ) { - throw new IllegalArgumentException( - "None of the experiment ids were valid (out of " + eeIds.size() + " provided)" ); - } - if ( newExperiments.size() < eeIds.size() ) { - throw new IllegalArgumentException( - "Some of the experiment ids were invalid: only found " + newExperiments.size() + " out of " - + eeIds.size() + " provided)" ); - } - - assert newExperiments.size() == eeIds.size(); - Collection basColl = new HashSet<>(); - for ( ExpressionExperiment experiment : newExperiments ) { - Taxon eeTaxon = this.getTaxonForSet( experiment ); - - // make sure experiments being added are from the right taxon - if ( eeTaxon == null || !eeTaxon.equals( eeSet.getTaxon() ) ) { - throw new IllegalArgumentException( - experiment + " is of the wrong taxon to add to eeset. EESet taxon is " + eeSet.getTaxon() ); - } - - basColl.add( experiment ); - - } - - eeSet.getExperiments().clear(); - eeSet.getExperiments().addAll( basColl ); - - this.update( eeSet ); - } - - @Override - @Transactional - public ExpressionExperimentSetValueObject updateDatabaseEntityNameDesc( ExpressionExperimentSetValueObject eeSetVO, - boolean loadEEIds ) { - - Long groupId = eeSetVO.getId(); - ExpressionExperimentSet eeSet = this.load( groupId ); - if ( eeSet == null ) { - throw new IllegalArgumentException( "No experiment set with id=" + groupId + " could be loaded" ); - } - - eeSet.setDescription( eeSetVO.getDescription() ); - if ( eeSetVO.getName() != null && eeSetVO.getName().length() > 0 ) - eeSet.setName( eeSetVO.getName() ); - this.update( eeSet ); - - return this.loadValueObjectById( eeSet.getId(), loadEEIds ); - - } - @Override @Transactional(readOnly = true) public ExpressionExperimentSetValueObject loadValueObjectById( Long id ) { @@ -386,7 +191,7 @@ public void update( final ExpressionExperimentSet expressionExperimentSet ) { Taxon groupTaxon = expressionExperimentSet.getTaxon(); Taxon eeTaxon; for ( BioAssaySet ee : expressionExperimentSet.getExperiments() ) { - eeTaxon = this.getTaxonForSet( ee ); + eeTaxon = expressionExperimentService.getTaxon( ee ); if ( eeTaxon == null ) { // this can happen if there are 0 samples @@ -412,19 +217,4 @@ public void update( final ExpressionExperimentSet expressionExperimentSet ) { public void update( Collection entities ) { entities.forEach( this::update ); } - - private String getMasterSetName( Taxon taxon ) { - return "Master set for " + taxon.getCommonName(); - } - - private Taxon getTaxonForSet( BioAssaySet experiment ) { - Taxon eeTaxon = expressionExperimentService.getTaxon( experiment ); - - if ( eeTaxon == null ) { - // can happen if the experiment has no samples. - return null; - } - - return eeTaxon; - } } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelper.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelper.java index 5f7b51f7fe..fa50ed2c3e 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelper.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelper.java @@ -1,13 +1,13 @@ /* * The Gemma project - * + * * Copyright (c) 2012 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. @@ -17,18 +17,44 @@ import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; import ubic.gemma.model.expression.experiment.ExpressionExperimentSetValueObject; +import java.util.Collection; + /** * @author paul */ public interface ExpressionExperimentSetValueObjectHelper { /** - * Tries to load an existing experiment set with the param's id, if no experiment can be loaded, create a new one - * with id = null. Sets all fields of the new entity with values from the valueObject param. + * Create an experiment set from a VO. + *

+ * The set is made public if {@link ExpressionExperimentSetValueObject#getIsPublic()} is true, otherwise it is made + * private. + */ + ExpressionExperimentSet create( ExpressionExperimentSetValueObject eesvo ); + + /** + * Update corresponding entity based on value object + */ + void update( ExpressionExperimentSetValueObject eesvo ); + + /** + * Updates the database record for the param experiment set value object (permission permitting) with the value + * object's name and description. * - * @param setVO if null, returns null - * @return ee set + * @param loadEEIds whether the returned value object should have the {@link ExpressionExperimentSetValueObject#getExpressionExperimentIds()} + * collection populated. This might be useful information, but loading the IDs takes slightly longer, + * so for larger amount of EE sets this might want to be avoided. + */ + ExpressionExperimentSetValueObject updateNameAndDescription( ExpressionExperimentSetValueObject eeSetVO, boolean loadEEIds ); + + /** + * Updates the database record for the param experiment set value object (permission permitting) with the members + * specified of the set, not the name or description etc. */ - ExpressionExperimentSet convertToEntity( ExpressionExperimentSetValueObject setVO ); + void updateMembers( Long groupId, Collection eeIds ); + /** + * Delete the experiment set corresponding to the given VO. + */ + void delete( ExpressionExperimentSetValueObject eesvo ); } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperImpl.java index ebf86a9153..b669f7a9be 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperImpl.java @@ -1,26 +1,26 @@ /* * The Gemma project - * + * * Copyright (c) 2009 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. - * + * */ /* * The Gemma project - * + * * Copyright (c) 2012 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -37,9 +37,12 @@ package ubic.gemma.persistence.service.expression.experiment; +import gemma.gsec.SecurityService; import lombok.extern.apachecommons.CommonsLog; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; +import org.springframework.transaction.annotation.Transactional; +import org.springframework.util.Assert; import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; import ubic.gemma.model.expression.experiment.BioAssaySet; import ubic.gemma.model.expression.experiment.ExpressionExperiment; @@ -62,22 +65,180 @@ @CommonsLog public class ExpressionExperimentSetValueObjectHelperImpl implements ExpressionExperimentSetValueObjectHelper { - @Autowired - private ExpressionExperimentSetService expressionExperimentSetService; + private final ExpressionExperimentSetService expressionExperimentSetService; + private final ExpressionExperimentService expressionExperimentService; + private final TaxonService taxonService; + private final SecurityService securityService; @Autowired - private ExpressionExperimentService expressionExperimentService; + public ExpressionExperimentSetValueObjectHelperImpl( ExpressionExperimentSetService expressionExperimentSetService, ExpressionExperimentService expressionExperimentService, TaxonService taxonService, SecurityService securityService ) { + this.expressionExperimentSetService = expressionExperimentSetService; + this.expressionExperimentService = expressionExperimentService; + this.taxonService = taxonService; + this.securityService = securityService; + } - @Autowired - private TaxonService taxonService; - /* - * @see - * ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentSetValueObjectHelper#convertToLightValueObject(ubic.gemma - * .model.analysis.expression.ExpressionExperimentSet) - */ @Override - public ExpressionExperimentSet convertToEntity( ExpressionExperimentSetValueObject setVO ) { + @Transactional + public ExpressionExperimentSet create( ExpressionExperimentSetValueObject eesvo ) { + + /* + * Sanity check. + */ + Collection dups = expressionExperimentSetService.findByName( eesvo.getName() ); + if ( dups == null || !dups.isEmpty() ) { + throw new IllegalArgumentException( + "Sorry, there is already a set with that name (" + eesvo.getName() + ")" ); + } + + ExpressionExperimentSet newSet = ExpressionExperimentSet.Factory.newInstance(); + newSet.setName( eesvo.getName() ); + newSet.setDescription( eesvo.getDescription() ); + + Collection datasetsAnalyzed = expressionExperimentService.load( + eesvo.getExpressionExperimentIds() ); + + newSet.getExperiments().addAll( datasetsAnalyzed ); + + if ( eesvo.getTaxonId() != null ) + newSet.setTaxon( taxonService.load( eesvo.getTaxonId() ) ); + else { + /* + * Figure out the taxon from the experiments. mustn't be heterogeneous. + */ + Taxon taxon = null; + for ( BioAssaySet bioAssaySet : newSet.getExperiments() ) { + Taxon eeTaxon = expressionExperimentService.getTaxon( bioAssaySet ); + /* + * this can be null. + */ + + if ( taxon == null ) { + taxon = eeTaxon; + } else { + assert eeTaxon != null; + if ( !eeTaxon.equals( taxon ) ) { + throw new UnsupportedOperationException( "EESets with mixed taxa are not supported" ); + } + } + } + + if ( taxon == null ) { + throw new IllegalStateException( "Could not determine taxon for new EEset" ); + } + newSet.setTaxon( taxon ); + + } + + if ( newSet.getTaxon() == null ) { + throw new IllegalArgumentException( "Unable to determine the taxon for the EESet" ); + } + + ExpressionExperimentSet newEESet = expressionExperimentSetService.create( newSet ); + + // make groups private by default + if ( eesvo.getIsPublic() ) { + securityService.makePublic( newEESet ); + } else { + securityService.makePrivate( newEESet ); + } + + return newEESet; + + } + + @Override + @Transactional + public void update( ExpressionExperimentSetValueObject eesvo ) { + Assert.notNull( eesvo, "Cannot update null set" ); + Assert.notNull( eesvo.getId(), "Experiment set VO must have a non-null ID." ); + ExpressionExperimentSet eeset = convertToEntity( eesvo ); + expressionExperimentSetService.update( eeset ); + } + + @Override + @Transactional + public ExpressionExperimentSetValueObject updateNameAndDescription( ExpressionExperimentSetValueObject eeSetVO, + boolean loadEEIds ) { + + Long groupId = eeSetVO.getId(); + ExpressionExperimentSet eeSet = expressionExperimentSetService.load( groupId ); + if ( eeSet == null ) { + throw new IllegalArgumentException( "No experiment set with id=" + groupId + " could be loaded" ); + } + + eeSet.setDescription( eeSetVO.getDescription() ); + if ( eeSetVO.getName() != null && !eeSetVO.getName().isEmpty() ) + eeSet.setName( eeSetVO.getName() ); + expressionExperimentSetService.update( eeSet ); + + return expressionExperimentSetService.loadValueObjectById( eeSet.getId(), loadEEIds ); + } + + @Override + @Transactional + public void updateMembers( Long groupId, Collection eeIds ) { + + if ( eeIds.isEmpty() ) { + throw new IllegalArgumentException( "No expression experiment ids provided. Cannot save an empty set." ); + + } + ExpressionExperimentSet eeSet = expressionExperimentSetService.load( groupId ); + + if ( eeSet == null ) { + throw new IllegalArgumentException( "No experiment set with id=" + groupId + " could be loaded. " + + "Either it does not exist or you do not have permission to view it." ); + } + + // check that new member ids are valid + Collection newExperiments = expressionExperimentService.load( eeIds ); + + if ( newExperiments.isEmpty() ) { + throw new IllegalArgumentException( + "None of the experiment ids were valid (out of " + eeIds.size() + " provided)" ); + } + if ( newExperiments.size() < eeIds.size() ) { + throw new IllegalArgumentException( + "Some of the experiment ids were invalid: only found " + newExperiments.size() + " out of " + + eeIds.size() + " provided)" ); + } + + assert newExperiments.size() == eeIds.size(); + Collection basColl = new HashSet<>(); + for ( ExpressionExperiment experiment : newExperiments ) { + Taxon eeTaxon = expressionExperimentService.getTaxon( experiment ); + + // make sure experiments being added are from the right taxon + if ( eeTaxon == null || !eeTaxon.equals( eeSet.getTaxon() ) ) { + throw new IllegalArgumentException( + experiment + " is of the wrong taxon to add to eeset. EESet taxon is " + eeSet.getTaxon() ); + } + + basColl.add( experiment ); + + } + + eeSet.getExperiments().clear(); + eeSet.getExperiments().addAll( basColl ); + + expressionExperimentSetService.update( eeSet ); + } + + @Override + @Transactional + public void delete( ExpressionExperimentSetValueObject eesvo ) { + expressionExperimentSetService.remove( expressionExperimentSetService.loadOrFail( eesvo.getId() ) ); + } + + /** + * Tries to load an existing experiment set with the param's id, if no experiment can be loaded, create a new one + * with id = null. Sets all fields of the new entity with values from the valueObject param. + * + * @param setVO if null, returns null + * @return ee set + */ + ExpressionExperimentSet convertToEntity( ExpressionExperimentSetValueObject setVO ) { if ( setVO == null ) { return null; } @@ -113,5 +274,4 @@ public ExpressionExperimentSet convertToEntity( ExpressionExperimentSetValueObje return entity; } - } diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetServiceTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetServiceTest.java index 0d7f84c91f..dea2bcb921 100755 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetServiceTest.java @@ -23,17 +23,17 @@ import org.junit.Before; import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; - import ubic.gemma.core.util.test.BaseSpringContextTest; import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; import ubic.gemma.model.expression.experiment.BioAssaySet; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.expression.experiment.ExpressionExperimentSetValueObject; import ubic.gemma.model.genome.Taxon; -import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; -import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentSetService; -import java.util.*; +import java.util.Collection; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.Set; import static org.junit.Assert.*; @@ -50,6 +50,9 @@ public class ExpressionExperimentSetServiceTest extends BaseSpringContextTest { @Autowired private ExpressionExperimentSetService expressionExperimentSetService; + @Autowired + private ExpressionExperimentSetValueObjectHelper expressionExperimentSetValueObjectHelper; + private ExpressionExperiment ee1 = null; private ExpressionExperiment ee2 = null; private ExpressionExperiment eeMouse = null; @@ -141,8 +144,7 @@ public void testAddingExperimentOfWrongTaxonUpdateDatabaseEntityMembers() { Collection newMemberIds = new LinkedList<>(); newMemberIds.add( ee1.getId() ); newMemberIds.add( eeMouse.getId() ); - - expressionExperimentSetService.updateDatabaseEntityMembers( eeSet.getId(), newMemberIds ); + expressionExperimentSetValueObjectHelper.updateMembers( eeSet.getId(), newMemberIds ); } // @@ -162,6 +164,5 @@ public void testAddingExperimentOfWrongTaxonUpdateDatabaseEntityMembers() { public void testIsAutomaticallyGenerated() { assertTrue( expressionExperimentSetService.isAutomaticallyGenerated( eeSetAutoGen.getDescription() ) ); assertFalse( expressionExperimentSetService.isAutomaticallyGenerated( eeSet.getDescription() ) ); - } } diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperTest.java index dd92ea645c..ccef4355b3 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperTest.java @@ -23,16 +23,13 @@ import org.junit.Before; import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; +import ubic.gemma.core.util.test.BaseSpringContextTest; +import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; import ubic.gemma.model.expression.experiment.BioAssaySet; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.expression.experiment.ExpressionExperimentDetailsValueObject; import ubic.gemma.model.expression.experiment.ExpressionExperimentSetValueObject; -import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentSetValueObjectHelper; -import ubic.gemma.core.util.test.BaseSpringContextTest; -import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; import ubic.gemma.model.genome.Taxon; -import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; -import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentSetService; import ubic.gemma.persistence.util.EntityUtils; import java.util.Collection; @@ -56,7 +53,7 @@ public class ExpressionExperimentSetValueObjectHelperTest extends BaseSpringCont private ExpressionExperimentSetService expressionExperimentSetService; @Autowired - private ExpressionExperimentSetValueObjectHelper expressionExperimentSetValueObjectHelper; + private ExpressionExperimentSetValueObjectHelperImpl expressionExperimentSetValueObjectHelper; private ExpressionExperiment ee = null; private ExpressionExperimentSet eeSet = null; diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentSetController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentSetController.java index 1c8384f0d6..3e8dc6d740 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentSetController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentSetController.java @@ -31,15 +31,14 @@ import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; import ubic.gemma.model.expression.experiment.ExpressionExperimentDetailsValueObject; import ubic.gemma.model.expression.experiment.ExpressionExperimentSetValueObject; -import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject; import ubic.gemma.model.expression.experiment.SessionBoundExpressionExperimentSetValueObject; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentSetService; +import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentSetValueObjectHelper; import ubic.gemma.persistence.util.EntityUtils; import ubic.gemma.web.controller.BaseController; import ubic.gemma.web.persistence.SessionListManager; import ubic.gemma.web.util.EntityNotFoundException; -import javax.annotation.Nullable; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import java.util.ArrayList; @@ -59,6 +58,9 @@ public class ExpressionExperimentSetController extends BaseController { @Autowired private ExpressionExperimentSetService expressionExperimentSetService; + @Autowired + private ExpressionExperimentSetValueObjectHelper expressionExperimentSetValueObjectHelper; + @Autowired private SessionListManager sessionListManager; @@ -385,8 +387,7 @@ public Collection update( */ @SuppressWarnings("unused") // Used in front end public String updateMembers( Long groupId, Collection eeIds ) { - - expressionExperimentSetService.updateDatabaseEntityMembers( groupId, eeIds ); + expressionExperimentSetValueObjectHelper.updateMembers( groupId, eeIds ); return null; //FIXME the called method never set the string property. } @@ -399,9 +400,7 @@ public String updateMembers( Long groupId, Collection eeIds ) { * @return a value object for the updated set */ public ExpressionExperimentSetValueObject updateNameDesc( ExpressionExperimentSetValueObject eeSetVO ) { - - return expressionExperimentSetService.updateDatabaseEntityNameDesc( eeSetVO, false ); - + return expressionExperimentSetValueObjectHelper.updateNameAndDescription( eeSetVO, false ); } /** @@ -453,7 +452,7 @@ private ExpressionExperimentSet create( ExpressionExperimentSetValueObject obj ) throw new IllegalArgumentException( "You must provide a name" ); } - return expressionExperimentSetService.createFromValueObject( obj ); + return expressionExperimentSetValueObjectHelper.create( obj ); } /** @@ -488,7 +487,7 @@ private ExpressionExperimentSetValueObject getExpressionExperimentSetFromRequest */ private void remove( ExpressionExperimentSetValueObject obj ) { try { - expressionExperimentSetService.deleteDatabaseEntity( obj ); + expressionExperimentSetValueObjectHelper.delete( obj ); } catch ( Exception e ) { throw new RuntimeException( e ); } @@ -496,11 +495,9 @@ private void remove( ExpressionExperimentSetValueObject obj ) { private void update( ExpressionExperimentSetValueObject obj ) { try { - expressionExperimentSetService.updateDatabaseEntity( obj ); + expressionExperimentSetValueObjectHelper.update( obj ); } catch ( Exception e ) { throw new RuntimeException( e ); } - } - } From 3f615adcbd3ad7bd8676ea024ee2981b1607d4c2 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 17 Jun 2024 10:38:10 -0700 Subject: [PATCH 11/81] Remove results and contrasts in bulk --- ...DifferentialExpressionAnalysisDaoImpl.java | 37 +++++++------------ 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java index 50c2431233..9c8463ce65 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java @@ -40,11 +40,11 @@ import ubic.gemma.model.expression.experiment.*; import ubic.gemma.model.genome.Gene; import ubic.gemma.model.genome.Taxon; +import ubic.gemma.persistence.hibernate.HibernateUtils; import ubic.gemma.persistence.service.AbstractDao; import ubic.gemma.persistence.service.analysis.SingleExperimentAnalysisDaoBase; import ubic.gemma.persistence.util.CommonQueries; import ubic.gemma.persistence.util.EntityUtils; -import ubic.gemma.persistence.hibernate.HibernateUtils; import java.io.Serializable; import java.sql.PreparedStatement; @@ -664,28 +664,19 @@ public Map> getAnalysesByE @Override public void remove( DifferentialExpressionAnalysis analysis ) { - this.getSessionFactory().getCurrentSession().doWork( work -> { - PreparedStatement deleteContrast = work.prepareStatement( DELETE_CONTRAST_SQL ); - PreparedStatement deleteResult = work.prepareStatement( DELETE_RESULT_SQL ); - int numResults = 0; - int numContrasts = 0; - for ( ExpressionAnalysisResultSet rs : analysis.getResultSets() ) { - for ( DifferentialExpressionAnalysisResult result : rs.getResults() ) { - deleteResult.setLong( 1, result.getId() ); - deleteResult.addBatch(); - numResults++; - for ( ContrastResult cr : result.getContrasts() ) { - deleteContrast.setLong( 1, cr.getId() ); - deleteContrast.addBatch(); - numContrasts++; - } - } - } - statementLogger.logStatement( String.format( "%s [repeated %d times]", DELETE_CONTRAST_SQL, numContrasts ) ); - ensureExpectedRowsAreInserted( deleteContrast, deleteContrast.executeBatch() ); - statementLogger.logStatement( String.format( "%s [repeated %d times]", DELETE_RESULT_SQL, numResults ) ); - ensureExpectedRowsAreInserted( deleteResult, deleteResult.executeBatch() ); - } ); + log.info( "Removing " + analysis + "..." ); + List resultSetIds = EntityUtils.getIds( analysis.getResultSets() ); + if ( !resultSetIds.isEmpty() ) { + int removedContrasts = getSessionFactory().getCurrentSession() + .createSQLQuery( "delete cr from CONTRAST_RESULT cr where cr.DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT_FK in (select dear.ID from DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT dear where dear.RESULT_SET_FK in (:resultSetIds))" ) + .setParameterList( "resultSetIds", resultSetIds ) + .executeUpdate(); + int removedResults = getSessionFactory().getCurrentSession() + .createSQLQuery( "delete dear from DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT dear where dear.RESULT_SET_FK in (:resultSetIds)" ) + .setParameterList( "resultSetIds", resultSetIds ) + .executeUpdate(); + log.info( String.format( "Removed %d results and %d contrasts from %s.", removedResults, removedContrasts, analysis ) ); + } super.remove( analysis ); } From b3ce846c857925a137aa2d4c13aea675f63c083d Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 17 Jun 2024 10:48:18 -0700 Subject: [PATCH 12/81] Don't bother thawing EEs before removing it Vectors and analyses are removed in bulk, so there's no point thawing the experiment before removing it. --- .../diff/DifferentialExpressionAnalysisServiceImpl.java | 8 ++++++-- .../experiment/ExpressionExperimentServiceImpl.java | 5 ++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java index a964eb42e2..693ccb2810 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java @@ -225,11 +225,15 @@ public Map metas = this.geneDiffExMetaAnalysisDao .findByExperiment( toDelete.getExperimentAnalyzed() ); - geneDiffExMetaAnalysisDao.remove( metas ); + if ( !metas.isEmpty() ) { + log.info( "Removing " + metas.size() + " meta analyses with this experiment..." ); + geneDiffExMetaAnalysisDao.remove( metas ); + } // Remove the DEA super.remove( toDelete ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java index c34c113a8a..1db4c612ee 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java @@ -1554,15 +1554,14 @@ public void remove( Long id ) { @Override @Transactional public void remove( ExpressionExperiment ee ) { + ee = ensureInSession( ee ); + if ( !securityService.isEditable( ee ) ) { throw new SecurityException( "Error performing 'ExpressionExperimentService.remove(ExpressionExperiment expressionExperiment)' --> " + " You do not have permission to edit this experiment." ); } - // thaw everything - ee = thaw( ee ); - // Remove subsets Collection subsets = this.getSubSets( ee ); for ( ExpressionExperimentSubSet subset : subsets ) { From 0423761f7681e1ffb31847369695caf8caf4cec9 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 17 Jun 2024 12:07:38 -0700 Subject: [PATCH 13/81] Implement loading a QT by name and vector type (fix #1152) --- ...UniqueQuantitationTypeByNameException.java | 14 ++ .../quantitationtype/QuantitationTypeDao.java | 20 ++- .../QuantitationTypeDaoImpl.java | 23 +++- .../QuantitationTypeService.java | 25 ++-- .../QuantitationTypeServiceImpl.java | 21 ++- .../service/genome/gene/GeneSetDaoImpl.java | 2 +- .../QuantitationTypeDaoTest.java | 122 ++++++++++++++++-- .../rest/util/args/QuantitationTypeArg.java | 8 ++ .../util/args/QuantitationTypeArgService.java | 8 +- .../util/args/QuantitationTypeByIdArg.java | 12 +- .../util/args/QuantitationTypeByNameArg.java | 11 ++ .../gemma/rest/DatasetsWebServiceTest.java | 27 ++-- 12 files changed, 234 insertions(+), 59 deletions(-) create mode 100644 gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/NonUniqueQuantitationTypeByNameException.java diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/NonUniqueQuantitationTypeByNameException.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/NonUniqueQuantitationTypeByNameException.java new file mode 100644 index 0000000000..0ef8a734aa --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/NonUniqueQuantitationTypeByNameException.java @@ -0,0 +1,14 @@ +package ubic.gemma.persistence.service.common.quantitationtype; + +import org.hibernate.NonUniqueResultException; + +/** + * Exception raised when retrieving a non-unique QT by name. + * @author poirigui + */ +public class NonUniqueQuantitationTypeByNameException extends Exception { + + public NonUniqueQuantitationTypeByNameException( String message, NonUniqueResultException cause ) { + super( message, cause ); + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDao.java index 13ba312858..d194e2fd61 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDao.java @@ -1,8 +1,8 @@ /* * The Gemma project. - * + * * Copyright (c) 2006-2007 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -20,10 +20,11 @@ import ubic.gemma.model.common.quantitationtype.QuantitationType; import ubic.gemma.model.common.quantitationtype.QuantitationTypeValueObject; -import ubic.gemma.model.expression.bioAssayData.DesignElementDataVector; +import ubic.gemma.model.expression.bioAssayData.DataVector; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.FilteringVoEnabledDao; +import javax.annotation.Nullable; import java.util.Collection; import java.util.List; @@ -32,21 +33,26 @@ */ public interface QuantitationTypeDao extends FilteringVoEnabledDao { + @Nullable + QuantitationType loadByIdAndVectorType( Long id, ExpressionExperiment ee, Class dataVectorType ); List loadByDescription( String description ); /** * Locate a QT associated with the given ee matching the specification of the passed quantitationType, or null if * there isn't one. - * - * @return found QT + * + * @return found QT */ + @Nullable QuantitationType find( ExpressionExperiment ee, QuantitationType quantitationType ); /** - * Test if a given quantitation type is used by a given experiment for a given vector type. + * Find a quantitation type by experiment, name and data vector type. + * @throws org.hibernate.NonUniqueResultException if more than one QT with the name and vector type exists */ - boolean existsByExpressionExperimentAndVectorType( QuantitationType quantitationType, ExpressionExperiment ee, Class dataVectorClass ); + @Nullable + QuantitationType findByNameAndVectorType( ExpressionExperiment ee, String name, Class dataVectorType ); List loadValueObjectsWithExpressionExperiment( Collection qts, ExpressionExperiment ee ); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDaoImpl.java index 60b24916e9..7604ffbdbc 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDaoImpl.java @@ -27,6 +27,7 @@ import org.springframework.util.MultiValueMap; import ubic.gemma.model.common.quantitationtype.QuantitationType; import ubic.gemma.model.common.quantitationtype.QuantitationTypeValueObject; +import ubic.gemma.model.expression.bioAssayData.DataVector; import ubic.gemma.model.expression.bioAssayData.DesignElementDataVector; import ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector; import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector; @@ -125,12 +126,24 @@ public QuantitationType find( ExpressionExperiment ee, QuantitationType quantita } @Override - public boolean existsByExpressionExperimentAndVectorType( QuantitationType quantitationType, ExpressionExperiment ee, Class dataVectorClass ) { - return ( Boolean ) this.getSessionFactory().getCurrentSession() - .createQuery( "select count(v) > 0 from " + dataVectorClass.getName() + " v " - + "where v.quantitationType = :qt and v.expressionExperiment = :ee" ) - .setParameter( "qt", quantitationType ) + public QuantitationType findByNameAndVectorType( ExpressionExperiment ee, String name, Class dataVectorType ) { + String entityName = getSessionFactory().getClassMetadata( dataVectorType ).getEntityName(); + return ( QuantitationType ) this.getSessionFactory().getCurrentSession() + .createQuery( "select distinct v.quantitationType from " + entityName + " v " + + "where v.expressionExperiment = :ee and v.quantitationType.name = :name" ) .setParameter( "ee", ee ) + .setParameter( "name", name ) + .uniqueResult(); + } + + @Override + public QuantitationType loadByIdAndVectorType( Long id, ExpressionExperiment ee, Class dataVectorType ) { + String entityName = getSessionFactory().getClassMetadata( dataVectorType ).getEntityName(); + return ( QuantitationType ) this.getSessionFactory().getCurrentSession() + .createQuery( "select distinct v.quantitationType from " + entityName + " v " + + "where v.expressionExperiment = :ee and v.quantitationType.id = :id" ) + .setParameter( "ee", ee ) + .setParameter( "id", id ) .uniqueResult(); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeService.java index a2463fab69..c66b507901 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeService.java @@ -21,11 +21,12 @@ import org.springframework.security.access.annotation.Secured; import ubic.gemma.model.common.quantitationtype.QuantitationType; import ubic.gemma.model.common.quantitationtype.QuantitationTypeValueObject; -import ubic.gemma.model.expression.bioAssayData.DesignElementDataVector; +import ubic.gemma.model.expression.bioAssayData.DataVector; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.BaseService; import ubic.gemma.persistence.service.FilteringVoEnabledService; +import javax.annotation.Nullable; import java.util.Collection; import java.util.List; @@ -34,22 +35,30 @@ */ public interface QuantitationTypeService extends BaseService, FilteringVoEnabledService { + /** + * Find a quantitation type by ID and vector type. + *

+ * While the QT can be retrieved uniquely by ID, the purpose of this method is to ensure that it also belongs to a + * given expression experiment and data vector type. + */ + @Nullable + QuantitationType loadByIdAndVectorType( Long id, ExpressionExperiment ee, Class dataVectorType ); + /** * Locate a QT associated with the given ee matching the specification of the passed quantitationType, or null if * there isn't one. * - * @return found QT + * @return found QT */ - @Secured({ "GROUP_USER" }) + @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "ACL_SECURABLE_READ" }) QuantitationType find( ExpressionExperiment ee, QuantitationType quantitationType ); /** - * Find a quantitation type by ID and vector type. - *

- * While the QT can be retrieved uniquely by ID, the purpose of this method is to ensure that it also belongs to a - * given expression experiment and data vector type. + * @see QuantitationTypeDao#findByNameAndVectorType(ExpressionExperiment, String, Class) + * @throws NonUniqueQuantitationTypeByNameException if more than one QT matches the given name and vector type */ - boolean existsByExpressionExperimentAndVectorType( QuantitationType quantitationType, ExpressionExperiment ee, Class dataVectorType ); + @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "ACL_SECURABLE_READ" }) + QuantitationType findByNameAndVectorType( ExpressionExperiment ee, String name, Class dataVectorType ) throws NonUniqueQuantitationTypeByNameException; @Override @Secured({ "GROUP_USER" }) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeServiceImpl.java index c71d77cd1e..f95d139c89 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeServiceImpl.java @@ -18,12 +18,13 @@ */ package ubic.gemma.persistence.service.common.quantitationtype; +import org.hibernate.NonUniqueResultException; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import ubic.gemma.model.common.quantitationtype.QuantitationType; import ubic.gemma.model.common.quantitationtype.QuantitationTypeValueObject; -import ubic.gemma.model.expression.bioAssayData.DesignElementDataVector; +import ubic.gemma.model.expression.bioAssayData.DataVector; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.AbstractFilteringVoEnabledService; @@ -36,8 +37,7 @@ * @see QuantitationTypeService */ @Service -public class QuantitationTypeServiceImpl extends AbstractFilteringVoEnabledService - implements QuantitationTypeService { +public class QuantitationTypeServiceImpl extends AbstractFilteringVoEnabledService implements QuantitationTypeService { private final QuantitationTypeDao quantitationTypeDao; @@ -59,6 +59,12 @@ public List loadValueObjectsWithExpressionExperimen return this.quantitationTypeDao.loadValueObjectsWithExpressionExperiment( qts, expressionExperiment ); } + @Override + @Transactional(readOnly = true) + public QuantitationType loadByIdAndVectorType( Long id, ExpressionExperiment ee, Class dataVectorType ) { + return quantitationTypeDao.loadByIdAndVectorType( id, ee, dataVectorType ); + } + @Override @Transactional(readOnly = true) public QuantitationType find( ExpressionExperiment ee, QuantitationType quantitationType ) { @@ -67,8 +73,11 @@ public QuantitationType find( ExpressionExperiment ee, QuantitationType quantita @Override @Transactional(readOnly = true) - public boolean existsByExpressionExperimentAndVectorType( QuantitationType quantitationType, ExpressionExperiment ee, Class dataVectorType ) { - return this.quantitationTypeDao.existsByExpressionExperimentAndVectorType( quantitationType, ee, dataVectorType ); + public QuantitationType findByNameAndVectorType( ExpressionExperiment ee, String name, Class dataVectorType ) throws NonUniqueQuantitationTypeByNameException { + try { + return this.quantitationTypeDao.findByNameAndVectorType( ee, name, dataVectorType ); + } catch ( NonUniqueResultException e ) { + throw new NonUniqueQuantitationTypeByNameException( String.format( "More than one QuantitationType uses %s as name in %s for vectors of type %s.", name, ee, dataVectorType ), e ); + } } - } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneSetDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneSetDaoImpl.java index b7e490a1dc..4377285b89 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneSetDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneSetDaoImpl.java @@ -173,7 +173,7 @@ public Collection findByName( String name, @Nullable Taxon taxon ) { StopWatch timer = StopWatch.createStarted(); if ( StringUtils.isBlank( name ) ) return new HashSet<>(); - // slow? would it be faster to just findByName and then restrict taxon? + // slow? would it be faster to just findByNameAndVectorType and then restrict taxon? Query query = this.getSessionFactory().getCurrentSession().createQuery( "select gs from GeneSet gs join gs.members gm join gm.gene g " + "where gs.name like :query " diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDaoTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDaoTest.java index 476c740a2e..b9079348ae 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDaoTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDaoTest.java @@ -1,20 +1,26 @@ package ubic.gemma.persistence.service.common.quantitationtype; +import org.hibernate.NonUniqueResultException; import org.hibernate.SessionFactory; import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.test.context.ContextConfiguration; +import ubic.gemma.core.context.TestComponent; import ubic.gemma.core.util.test.BaseDatabaseTest; -import ubic.gemma.model.common.quantitationtype.QuantitationType; +import ubic.gemma.model.common.quantitationtype.*; +import ubic.gemma.model.expression.arrayDesign.ArrayDesign; +import ubic.gemma.model.expression.bioAssayData.BioAssayDimension; import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector; +import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.model.genome.Taxon; import ubic.gemma.persistence.util.Filter; import ubic.gemma.persistence.util.Filters; -import ubic.gemma.core.context.TestComponent; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; @ContextConfiguration public class QuantitationTypeDaoTest extends BaseDatabaseTest { @@ -39,12 +45,112 @@ public void testLoadValueObjects() { } @Test - public void testExistsByExpressionExperimentAndVectorType() { - QuantitationType qt = new QuantitationType(); - qt.setId( 1L ); + public void testLoadByIdAndVectorType() { + ArrayDesign ad = createPlatform(); + + QuantitationType qt = createQuantitationType( "test" ); + ExpressionExperiment ee = new ExpressionExperiment(); + + BioAssayDimension bad = new BioAssayDimension(); + sessionFactory.getCurrentSession().persist( bad ); + RawExpressionDataVector vector = new RawExpressionDataVector(); + vector.setBioAssayDimension( bad ); + vector.setDesignElement( ad.getCompositeSequences().iterator().next() ); + vector.setQuantitationType( qt ); + vector.setData( new byte[0] ); + vector.setExpressionExperiment( ee ); + + ee.getQuantitationTypes().add( qt ); + ee.getRawExpressionDataVectors().add( vector ); + + sessionFactory.getCurrentSession().persist( ee ); + assertThat( quantitationTypeDao.loadByIdAndVectorType( qt.getId(), ee, RawExpressionDataVector.class ) ) + .isEqualTo( qt ); + } + + @Test + public void testFindByNameAndVectorType() { + ArrayDesign ad = createPlatform(); + + ExpressionExperiment ee = new ExpressionExperiment(); + + BioAssayDimension bad = new BioAssayDimension(); + sessionFactory.getCurrentSession().persist( bad ); + + QuantitationType qt = createQuantitationType( "test" ); + RawExpressionDataVector vector = new RawExpressionDataVector(); + vector.setBioAssayDimension( bad ); + vector.setDesignElement( ad.getCompositeSequences().iterator().next() ); + vector.setQuantitationType( qt ); + vector.setData( new byte[0] ); + vector.setExpressionExperiment( ee ); + + ee.getQuantitationTypes().add( qt ); + ee.getRawExpressionDataVectors().add( vector ); + + sessionFactory.getCurrentSession().persist( ee ); + + assertThat( quantitationTypeDao.findByNameAndVectorType( ee, "test", RawExpressionDataVector.class ) ) + .isEqualTo( qt ); + } + + @Test + public void testFindByNameWhenNameIsNonUnique() { + ArrayDesign ad = createPlatform(); + ExpressionExperiment ee = new ExpressionExperiment(); - ee.setId( 1L ); - assertThat( quantitationTypeDao.existsByExpressionExperimentAndVectorType( qt, ee, RawExpressionDataVector.class ) ) - .isFalse(); + + BioAssayDimension bad = new BioAssayDimension(); + sessionFactory.getCurrentSession().persist( bad ); + + QuantitationType qt = createQuantitationType( "test" ); + RawExpressionDataVector vector = new RawExpressionDataVector(); + vector.setBioAssayDimension( bad ); + vector.setDesignElement( ad.getCompositeSequences().iterator().next() ); + vector.setQuantitationType( qt ); + vector.setData( new byte[0] ); + vector.setExpressionExperiment( ee ); + + ee.getQuantitationTypes().add( qt ); + ee.getRawExpressionDataVectors().add( vector ); + + QuantitationType qt2 = createQuantitationType( "test" ); + RawExpressionDataVector vector2 = new RawExpressionDataVector(); + vector2.setBioAssayDimension( bad ); + vector2.setDesignElement( ad.getCompositeSequences().iterator().next() ); + vector2.setQuantitationType( qt2 ); + vector2.setData( new byte[0] ); + vector2.setExpressionExperiment( ee ); + + ee.getQuantitationTypes().add( qt2 ); + ee.getRawExpressionDataVectors().add( vector2 ); + + sessionFactory.getCurrentSession().persist( ee ); + + assertThatThrownBy( () -> quantitationTypeDao.findByNameAndVectorType( ee, "test", RawExpressionDataVector.class ) ) + .isInstanceOf( NonUniqueResultException.class ); + } + + private ArrayDesign createPlatform() { + Taxon taxon = new Taxon(); + sessionFactory.getCurrentSession().persist( taxon ); + ArrayDesign ad = new ArrayDesign(); + ad.setPrimaryTaxon( taxon ); + CompositeSequence cs = new CompositeSequence(); + cs.setArrayDesign( ad ); + ad.getCompositeSequences().add( cs ); + sessionFactory.getCurrentSession().persist( ad ); + return ad; + } + + private QuantitationType createQuantitationType( String name ) { + QuantitationType newQt = new QuantitationType(); + newQt.setName( name ); + newQt.setGeneralType( GeneralType.QUANTITATIVE ); + newQt.setType( StandardQuantitationType.AMOUNT ); + newQt.setScale( ScaleType.LOG2 ); + newQt.setRepresentation( PrimitiveType.DOUBLE ); + sessionFactory.getCurrentSession().persist( newQt ); + return newQt; } } \ No newline at end of file diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QuantitationTypeArg.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QuantitationTypeArg.java index 541eedcf85..7f93656580 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QuantitationTypeArg.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QuantitationTypeArg.java @@ -6,6 +6,8 @@ import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.common.quantitationtype.QuantitationTypeService; +import javax.annotation.Nullable; + @Schema(oneOf = { QuantitationTypeByIdArg.class, QuantitationTypeByNameArg.class }) public abstract class QuantitationTypeArg extends AbstractEntityArg { @@ -13,6 +15,12 @@ protected QuantitationTypeArg( String propertyName, Class propertyType, T val super( propertyName, propertyType, value ); } + /** + * Obtain a QT for a specific experiment and vector type. + */ + @Nullable + abstract QuantitationType getEntity( ExpressionExperiment ee, QuantitationTypeService service, Class dataVectorType ); + public static QuantitationTypeArg valueOf( String s ) { try { return new QuantitationTypeByIdArg( Long.parseLong( s ) ); diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QuantitationTypeArgService.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QuantitationTypeArgService.java index bb703a0935..eeab24bfbe 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QuantitationTypeArgService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QuantitationTypeArgService.java @@ -16,12 +16,6 @@ public QuantitationTypeArgService( QuantitationTypeService service ) { } public QuantitationType getEntity( QuantitationTypeArg quantitationTypeArg, ExpressionExperiment ee, Class vectorType ) { - QuantitationType quantitationType = getEntity( quantitationTypeArg ); - if ( service.existsByExpressionExperimentAndVectorType( quantitationType, ee, vectorType ) ) { - return quantitationType; - } else { - // will raise a 404 error - return checkEntity( quantitationTypeArg, null ); - } + return checkEntity( quantitationTypeArg, quantitationTypeArg.getEntity( ee, service, vectorType ) ); } } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QuantitationTypeByIdArg.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QuantitationTypeByIdArg.java index 51556e0228..6bac6506f3 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QuantitationTypeByIdArg.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QuantitationTypeByIdArg.java @@ -2,10 +2,11 @@ import io.swagger.v3.oas.annotations.media.Schema; import ubic.gemma.model.common.quantitationtype.QuantitationType; +import ubic.gemma.model.expression.bioAssayData.DesignElementDataVector; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.common.quantitationtype.QuantitationTypeService; -import javax.ws.rs.BadRequestException; -import javax.ws.rs.NotFoundException; +import javax.annotation.Nullable; @Schema(type = "integer", format = "int64", description = "A quantitation type ID.") public class QuantitationTypeByIdArg extends QuantitationTypeArg { @@ -14,8 +15,13 @@ public class QuantitationTypeByIdArg extends QuantitationTypeArg { } @Override - QuantitationType getEntity( QuantitationTypeService service ) throws NotFoundException, BadRequestException { + QuantitationType getEntity( QuantitationTypeService service ) { return service.load( getValue() ); } + @Nullable + @Override + QuantitationType getEntity( ExpressionExperiment ee, QuantitationTypeService service, Class dataVectorType ) { + return service.loadByIdAndVectorType( getValue(), ee, dataVectorType ); + } } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QuantitationTypeByNameArg.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QuantitationTypeByNameArg.java index 0aff7afeca..9fa108aa2d 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QuantitationTypeByNameArg.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QuantitationTypeByNameArg.java @@ -2,6 +2,9 @@ import io.swagger.v3.oas.annotations.media.Schema; import ubic.gemma.model.common.quantitationtype.QuantitationType; +import ubic.gemma.model.expression.bioAssayData.DesignElementDataVector; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.persistence.service.common.quantitationtype.NonUniqueQuantitationTypeByNameException; import ubic.gemma.persistence.service.common.quantitationtype.QuantitationTypeService; import javax.ws.rs.BadRequestException; @@ -18,4 +21,12 @@ QuantitationType getEntity( QuantitationTypeService service ) throws BadRequestE throw new UnsupportedOperationException( "A name is insufficient to retrieve a unique quantitation type." ); } + @Override + QuantitationType getEntity( ExpressionExperiment ee, QuantitationTypeService service, Class dataVectorType ) { + try { + return service.findByNameAndVectorType( ee, getValue(), dataVectorType ); + } catch ( NonUniqueQuantitationTypeByNameException e ) { + throw new BadRequestException( "More than one quantitation type uses the given name. Use a numerical ID instead.", e ); + } + } } diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java index 2c8acd2286..cd5d5439b5 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java @@ -40,7 +40,10 @@ import ubic.gemma.persistence.service.expression.bioAssay.BioAssayService; import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; -import ubic.gemma.persistence.util.*; +import ubic.gemma.persistence.util.Filter; +import ubic.gemma.persistence.util.Filters; +import ubic.gemma.persistence.util.Slice; +import ubic.gemma.persistence.util.Sort; import ubic.gemma.rest.analytics.AnalyticsProvider; import ubic.gemma.rest.util.BaseJerseyTest; import ubic.gemma.rest.util.JacksonConfig; @@ -217,7 +220,7 @@ public void setUpMocks() throws TimeoutException { } @After - public void resetMocks() throws Exception { + public void resetMocks() { reset( expressionExperimentService, quantitationTypeService, analyticsProvider, expressionDataFileService ); } @@ -339,9 +342,7 @@ public void testGetDatasetsWhenInferenceTimeoutThenProduce503ServiceUnavailable( .thenReturn( new Slice<>( Collections.emptyList(), null, null, null, null ) ); assertThat( target( "/datasets" ).queryParam( "filter", "allCharacteristic.valueUri in (a, b, c)" ).request().get() ) .hasStatus( Response.Status.SERVICE_UNAVAILABLE ) - .hasHeaderSatisfying( "Retry-After", values -> { - assertThat( values ).isNotEmpty(); - } ) + .hasHeaderSatisfying( "Retry-After", values -> assertThat( values ).isNotEmpty() ) .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ); } @@ -431,7 +432,7 @@ public void testGetDatasetsAnnotationsForUncategorizedTerms() { } @Test - public void testGetDatasetsCategories() throws SearchException { + public void testGetDatasetsCategories() { assertThat( target( "/datasets/categories" ).request().get() ) .hasStatus( Response.Status.OK ) .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ); @@ -461,7 +462,7 @@ public void testGetDatasetProcessedExpression() throws IOException { } @Test - public void testGetDatasetProcessedExpressionWhenNoProcessedVectorsExist() throws IOException { + public void testGetDatasetProcessedExpressionWhenNoProcessedVectorsExist() { when( expressionExperimentService.hasProcessedExpressionData( eq( ee ) ) ).thenReturn( false ); assertThat( target( "/datasets/1/data/processed" ).request().get() ) .hasStatus( Response.Status.NOT_FOUND ) @@ -487,15 +488,14 @@ public void testGetDatasetRawExpression() throws IOException { } @Test - public void testGetDatasetRawExpressionByQuantitationTypeWhenQtIsNotFromTheDataset() throws IOException { + public void testGetDatasetRawExpressionByQuantitationTypeWhenQtIsNotFromTheDataset() { QuantitationType qt = QuantitationType.Factory.newInstance(); qt.setId( 12L ); when( quantitationTypeService.load( 12L ) ).thenReturn( qt ); - when( quantitationTypeService.existsByExpressionExperimentAndVectorType( qt, ee, RawExpressionDataVector.class ) ).thenReturn( false ); + when( quantitationTypeService.loadByIdAndVectorType( 12L, ee, RawExpressionDataVector.class ) ).thenReturn( null ); Response res = target( "/datasets/1/data/raw" ) .queryParam( "quantitationType", "12" ).request().get(); - verify( quantitationTypeService ).load( 12L ); - verify( quantitationTypeService ).existsByExpressionExperimentAndVectorType( qt, ee, RawExpressionDataVector.class ); + verify( quantitationTypeService ).loadByIdAndVectorType( 12L, ee, RawExpressionDataVector.class ); verifyNoInteractions( expressionDataFileService ); assertThat( res ) .hasStatus( Response.Status.NOT_FOUND ) @@ -507,11 +507,10 @@ public void testGetDatasetRawExpressionByQuantitationType() throws IOException { QuantitationType qt = QuantitationType.Factory.newInstance(); qt.setId( 12L ); when( quantitationTypeService.load( 12L ) ).thenReturn( qt ); - when( quantitationTypeService.existsByExpressionExperimentAndVectorType( qt, ee, RawExpressionDataVector.class ) ).thenReturn( true ); + when( quantitationTypeService.loadByIdAndVectorType( 12L, ee, RawExpressionDataVector.class ) ).thenReturn( qt ); Response res = target( "/datasets/1/data/raw" ) .queryParam( "quantitationType", "12" ).request().get(); - verify( quantitationTypeService ).load( 12L ); - verify( quantitationTypeService ).existsByExpressionExperimentAndVectorType( qt, ee, RawExpressionDataVector.class ); + verify( quantitationTypeService ).loadByIdAndVectorType( 12L, ee, RawExpressionDataVector.class ); verify( expressionDataFileService ).writeRawExpressionData( eq( ee ), eq( qt ), any() ); assertThat( res ).hasStatus( Response.Status.OK ) .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ) From e0bf64b3560fea727c82e84271d155f8ebed0767 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 17 Jun 2024 15:27:34 -0700 Subject: [PATCH 14/81] Ignore private gene sets in GeneSetSearchService (fix #1150) --- .../genome/gene/GeneSearchServiceImpl.java | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneSearchServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneSearchServiceImpl.java index 5f358072bc..a4c5a23180 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneSearchServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneSearchServiceImpl.java @@ -215,7 +215,15 @@ public Collection searchGenesAndGeneGroups( String qu // convert result object to a value object List> dbsgvo = taxonCheckedSets.stream() .filter( Objects::nonNull ) - .map( sr -> sr.withResultObject( geneSetValueObjectHelper.convertToValueObject( sr.getResultObject() ) ) ) + .map( sr -> { + try { + return sr.withResultObject( geneSetValueObjectHelper.convertToValueObject( sr.getResultObject() ) ); + } catch ( AccessDeniedException e ) { + // ignore gene sets current user is not allowed to see + return null; + } + } ) + .filter( Objects::nonNull ) .collect( Collectors.toList() ); geneSets = SearchResultDisplayObject.convertSearchResults2SearchResultDisplayObjects( dbsgvo ); @@ -241,7 +249,13 @@ public Collection searchGenesAndGeneGroups( String qu isSetOwnedByUser.put( gs.getId(), securityService.isOwnedByCurrentUser( gs ) ); taxon = geneSetService.getTaxon( gs ); - GeneSetValueObject gsVo = geneSetValueObjectHelper.convertToValueObject( gs ); + GeneSetValueObject gsVo; + try { + gsVo = geneSetValueObjectHelper.convertToValueObject( gs ); + } catch ( AccessDeniedException e ) { + // ignore gene sets current user is not allowed to see + continue; + } srDo = new SearchResultDisplayObject( gsVo ); if ( taxon != null ) { srDo.setTaxonId( taxon.getId() ); From 13e7014daacbcfe1a178625c8a82be3e186665dd Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 17 Jun 2024 15:31:56 -0700 Subject: [PATCH 15/81] Fix incorrect security config attribute for getRanks() --- .../bioAssayData/ProcessedExpressionDataVectorService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/ProcessedExpressionDataVectorService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/ProcessedExpressionDataVectorService.java index 156d607e7f..da2f372752 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/ProcessedExpressionDataVectorService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/ProcessedExpressionDataVectorService.java @@ -183,7 +183,7 @@ Collection getProcessedDataArraysByProbeIds( BioAssaySe Collection getProcessedDataVectorsAndThaw( ExpressionExperiment expressionExperiment ); - @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "ACL_AFTER_MAP_READ", "ACL_SECURABLE_COLLECTION_READ" }) + @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "ACL_SECURABLE_COLLECTION_READ" }) Map>> getRanks( Collection expressionExperiments, Collection genes, ProcessedExpressionDataVectorDao.RankMethod method ); From 36b08fe3103421d57c7dc357bf3efaa98ee6113f Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 17 Jun 2024 15:51:22 -0700 Subject: [PATCH 16/81] Make convertToEntity public for testing purposes --- .../experiment/ExpressionExperimentSetValueObjectHelper.java | 2 ++ .../ExpressionExperimentSetValueObjectHelperImpl.java | 2 +- .../ExpressionExperimentSetValueObjectHelperTest.java | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelper.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelper.java index fa50ed2c3e..c7fe65f052 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelper.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelper.java @@ -57,4 +57,6 @@ public interface ExpressionExperimentSetValueObjectHelper { * Delete the experiment set corresponding to the given VO. */ void delete( ExpressionExperimentSetValueObject eesvo ); + + ExpressionExperimentSet convertToEntity( ExpressionExperimentSetValueObject eesvo ); } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperImpl.java index b669f7a9be..9225a59481 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperImpl.java @@ -238,7 +238,7 @@ public void delete( ExpressionExperimentSetValueObject eesvo ) { * @param setVO if null, returns null * @return ee set */ - ExpressionExperimentSet convertToEntity( ExpressionExperimentSetValueObject setVO ) { + public ExpressionExperimentSet convertToEntity( ExpressionExperimentSetValueObject setVO ) { if ( setVO == null ) { return null; } diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperTest.java index ccef4355b3..7b7c1ef187 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentSetValueObjectHelperTest.java @@ -53,7 +53,7 @@ public class ExpressionExperimentSetValueObjectHelperTest extends BaseSpringCont private ExpressionExperimentSetService expressionExperimentSetService; @Autowired - private ExpressionExperimentSetValueObjectHelperImpl expressionExperimentSetValueObjectHelper; + private ExpressionExperimentSetValueObjectHelper expressionExperimentSetValueObjectHelper; private ExpressionExperiment ee = null; private ExpressionExperimentSet eeSet = null; From ffc1ce508fe25e8b980610ea124fc3aee8306a8e Mon Sep 17 00:00:00 2001 From: Paul Pavlidis Date: Tue, 18 Jun 2024 14:51:25 -0700 Subject: [PATCH 17/81] minor --- .../gemma/core/apps/ExpressionExperimentPlatformSwitchCli.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentPlatformSwitchCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentPlatformSwitchCli.java index 975041e6e8..4d71167a44 100644 --- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentPlatformSwitchCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentPlatformSwitchCli.java @@ -77,7 +77,7 @@ public String getShortDesc() { protected void buildOptions( Options options ) { super.buildOptions( options ); Option arrayDesignOption = Option.builder( "a" ).hasArg().argName( "Array design" ).desc( - "Array design name (or short name) - no need to specifiy if the platforms used by the EE are merged" ) + "Array design short name to be switched to - no need to specify if the platforms used by the EE are merged" ) .longOpt( "array" ).build(); options.addOption( arrayDesignOption ); this.addForceOption( options ); @@ -115,7 +115,6 @@ private void processExperiment( ExpressionExperiment ee ) { /** * @param name of the array design to find. - * @param arrayDesignService the arrayDesignService to use for the AD retrieval * @return an array design, if found. Bails otherwise with an error exit code */ private ArrayDesign locateArrayDesign( String name ) { From 24f2d1ac1086514ee697de583d3f57d87d3eb0c1 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 17 Jun 2024 16:52:45 -0700 Subject: [PATCH 18/81] Remove Eclipse configurations --- gemma-web/.project | 82 ------------------- gemma-web/.settings/.jsdtscope | 13 --- .../com.eclipsesource.jshint.ui.prefs | 3 - .../.settings/org.eclipse.jdt.core.prefs | 15 ---- gemma-web/.settings/org.eclipse.m2e.wtp.prefs | 2 - ...se.wst.common.project.facet.core.prefs.xml | 7 -- ....eclipse.wst.common.project.facet.core.xml | 8 -- .../.settings/org.eclipse.wst.jsdt.core.prefs | 10 --- .../.settings/org.eclipse.wst.jsdt.ui.prefs | 2 - ...rg.eclipse.wst.jsdt.ui.superType.container | 1 - .../org.eclipse.wst.jsdt.ui.superType.name | 1 - .../org.eclipse.wst.validation.prefs | 2 - 12 files changed, 146 deletions(-) delete mode 100644 gemma-web/.project delete mode 100644 gemma-web/.settings/.jsdtscope delete mode 100644 gemma-web/.settings/com.eclipsesource.jshint.ui.prefs delete mode 100644 gemma-web/.settings/org.eclipse.jdt.core.prefs delete mode 100644 gemma-web/.settings/org.eclipse.m2e.wtp.prefs delete mode 100644 gemma-web/.settings/org.eclipse.wst.common.project.facet.core.prefs.xml delete mode 100644 gemma-web/.settings/org.eclipse.wst.common.project.facet.core.xml delete mode 100644 gemma-web/.settings/org.eclipse.wst.jsdt.core.prefs delete mode 100644 gemma-web/.settings/org.eclipse.wst.jsdt.ui.prefs delete mode 100644 gemma-web/.settings/org.eclipse.wst.jsdt.ui.superType.container delete mode 100644 gemma-web/.settings/org.eclipse.wst.jsdt.ui.superType.name delete mode 100644 gemma-web/.settings/org.eclipse.wst.validation.prefs diff --git a/gemma-web/.project b/gemma-web/.project deleted file mode 100644 index 484a2d7bda..0000000000 --- a/gemma-web/.project +++ /dev/null @@ -1,82 +0,0 @@ - - - gemma-web - - - - - - org.eclipse.wst.jsdt.core.javascriptValidator - - - - - org.eclipse.wst.common.project.facet.core.builder - - - - - org.eclipse.jdt.core.javabuilder - - - - - org.eclipse.wst.validation.validationbuilder - - - - - org.eclipse.ui.externaltools.ExternalToolBuilder - full,incremental, - - - LaunchConfigHandle - <project>/.externalToolBuilders/com.eclipsesource.jshint.ui.builder.launch - - - - - org.eclipse.ui.externaltools.ExternalToolBuilder - full,incremental, - - - LaunchConfigHandle - <project>/.externalToolBuilders/net.vtst.ow.eclipse.js.closure.closureBuilder.launch - - - - - org.eclipse.ui.externaltools.ExternalToolBuilder - full,incremental, - - - LaunchConfigHandle - <project>/.externalToolBuilders/org.springframework.ide.eclipse.core.springbuilder.launch - - - - - org.eclipse.ui.externaltools.ExternalToolBuilder - full,incremental, - - - LaunchConfigHandle - <project>/.externalToolBuilders/org.springframework.ide.eclipse.boot.validation.springbootbuilder.launch - - - - - org.eclipse.m2e.core.maven2Builder - - - - - - org.eclipse.jem.workbench.JavaEMFNature - org.eclipse.wst.common.modulecore.ModuleCoreNature - org.eclipse.jdt.core.javanature - org.eclipse.m2e.core.maven2Nature - org.eclipse.wst.common.project.facet.core.nature - org.eclipse.wst.jsdt.core.jsNature - - diff --git a/gemma-web/.settings/.jsdtscope b/gemma-web/.settings/.jsdtscope deleted file mode 100644 index ab0fb3d61b..0000000000 --- a/gemma-web/.settings/.jsdtscope +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - diff --git a/gemma-web/.settings/com.eclipsesource.jshint.ui.prefs b/gemma-web/.settings/com.eclipsesource.jshint.ui.prefs deleted file mode 100644 index a983aa51ed..0000000000 --- a/gemma-web/.settings/com.eclipsesource.jshint.ui.prefs +++ /dev/null @@ -1,3 +0,0 @@ -eclipse.preferences.version=1 -excluded=src/main/webapp/scripts/cytoscape//*.js\:src/main/webapp/scripts/lib//*.js -included=src/main/webapp/scripts//*.js diff --git a/gemma-web/.settings/org.eclipse.jdt.core.prefs b/gemma-web/.settings/org.eclipse.jdt.core.prefs deleted file mode 100644 index db24ee78c3..0000000000 --- a/gemma-web/.settings/org.eclipse.jdt.core.prefs +++ /dev/null @@ -1,15 +0,0 @@ -eclipse.preferences.version=1 -org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled -org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 -org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve -org.eclipse.jdt.core.compiler.compliance=1.8 -org.eclipse.jdt.core.compiler.debug.lineNumber=generate -org.eclipse.jdt.core.compiler.debug.localVariable=generate -org.eclipse.jdt.core.compiler.debug.sourceFile=generate -org.eclipse.jdt.core.compiler.problem.assertIdentifier=error -org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled -org.eclipse.jdt.core.compiler.problem.enumIdentifier=error -org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning -org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore -org.eclipse.jdt.core.compiler.release=disabled -org.eclipse.jdt.core.compiler.source=1.8 diff --git a/gemma-web/.settings/org.eclipse.m2e.wtp.prefs b/gemma-web/.settings/org.eclipse.m2e.wtp.prefs deleted file mode 100644 index ef86089622..0000000000 --- a/gemma-web/.settings/org.eclipse.m2e.wtp.prefs +++ /dev/null @@ -1,2 +0,0 @@ -eclipse.preferences.version=1 -org.eclipse.m2e.wtp.enabledProjectSpecificPrefs=false diff --git a/gemma-web/.settings/org.eclipse.wst.common.project.facet.core.prefs.xml b/gemma-web/.settings/org.eclipse.wst.common.project.facet.core.prefs.xml deleted file mode 100644 index cc8138509b..0000000000 --- a/gemma-web/.settings/org.eclipse.wst.common.project.facet.core.prefs.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - diff --git a/gemma-web/.settings/org.eclipse.wst.common.project.facet.core.xml b/gemma-web/.settings/org.eclipse.wst.common.project.facet.core.xml deleted file mode 100644 index 7b4cd6729d..0000000000 --- a/gemma-web/.settings/org.eclipse.wst.common.project.facet.core.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - diff --git a/gemma-web/.settings/org.eclipse.wst.jsdt.core.prefs b/gemma-web/.settings/org.eclipse.wst.jsdt.core.prefs deleted file mode 100644 index 34c375d6a6..0000000000 --- a/gemma-web/.settings/org.eclipse.wst.jsdt.core.prefs +++ /dev/null @@ -1,10 +0,0 @@ -eclipse.preferences.version=1 -org.eclipse.wst.jsdt.core.compiler.codegen.inlineJsrBytecode=disabled -org.eclipse.wst.jsdt.core.compiler.codegen.targetPlatform=1.2 -org.eclipse.wst.jsdt.core.compiler.codegen.unusedLocal=preserve -org.eclipse.wst.jsdt.core.compiler.compliance=1.4 -org.eclipse.wst.jsdt.core.compiler.debug.lineNumber=generate -org.eclipse.wst.jsdt.core.compiler.debug.localVariable=generate -org.eclipse.wst.jsdt.core.compiler.debug.sourceFile=generate -org.eclipse.wst.jsdt.core.compiler.problem.assertIdentifier=warning -org.eclipse.wst.jsdt.core.compiler.source=1.3 diff --git a/gemma-web/.settings/org.eclipse.wst.jsdt.ui.prefs b/gemma-web/.settings/org.eclipse.wst.jsdt.ui.prefs deleted file mode 100644 index 6bf5ec1a0a..0000000000 --- a/gemma-web/.settings/org.eclipse.wst.jsdt.ui.prefs +++ /dev/null @@ -1,2 +0,0 @@ -eclipse.preferences.version=1 -org.eclipse.wst.jsdt.ui.text.custom_code_templates= diff --git a/gemma-web/.settings/org.eclipse.wst.jsdt.ui.superType.container b/gemma-web/.settings/org.eclipse.wst.jsdt.ui.superType.container deleted file mode 100644 index 3bd5d0a480..0000000000 --- a/gemma-web/.settings/org.eclipse.wst.jsdt.ui.superType.container +++ /dev/null @@ -1 +0,0 @@ -org.eclipse.wst.jsdt.launching.baseBrowserLibrary \ No newline at end of file diff --git a/gemma-web/.settings/org.eclipse.wst.jsdt.ui.superType.name b/gemma-web/.settings/org.eclipse.wst.jsdt.ui.superType.name deleted file mode 100644 index 05bd71b6ec..0000000000 --- a/gemma-web/.settings/org.eclipse.wst.jsdt.ui.superType.name +++ /dev/null @@ -1 +0,0 @@ -Window \ No newline at end of file diff --git a/gemma-web/.settings/org.eclipse.wst.validation.prefs b/gemma-web/.settings/org.eclipse.wst.validation.prefs deleted file mode 100644 index 04cad8cb75..0000000000 --- a/gemma-web/.settings/org.eclipse.wst.validation.prefs +++ /dev/null @@ -1,2 +0,0 @@ -disabled=06target -eclipse.preferences.version=1 From 674000be4a7299e03e974f2cea10be81e8d72448 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 19 Jun 2024 10:03:26 -0700 Subject: [PATCH 19/81] rest: Allow gzip-base64 compression for all subclasses of AbstractArrayArg (fix #1154) Improve array schema descriptions by including minItems and base64-gzip encoding description wherever applicable. Do now allow compression for TaxonArrayArg and ExcludedArg. Remove unused FactorValueArrayArg. --- .../java/ubic/gemma/rest/util/ArgUtils.java | 3 +- .../rest/util/args/AbstractArrayArg.java | 55 +++++++++++++++++-- .../util/args/CompositeSequenceArrayArg.java | 35 ++++-------- .../rest/util/args/DatabaseEntryArrayArg.java | 13 ++--- .../gemma/rest/util/args/DatasetArrayArg.java | 24 ++------ .../ubic/gemma/rest/util/args/ExcludeArg.java | 8 +-- .../rest/util/args/FactorValueArrayArg.java | 21 ------- .../ubic/gemma/rest/util/args/FilterArg.java | 8 ++- .../gemma/rest/util/args/GeneArrayArg.java | 27 ++------- .../rest/util/args/PlatformArrayArg.java | 24 ++------ .../gemma/rest/util/args/StringArrayArg.java | 28 ++-------- .../gemma/rest/util/args/TaxonArrayArg.java | 26 ++------- 12 files changed, 101 insertions(+), 171 deletions(-) delete mode 100644 gemma-rest/src/main/java/ubic/gemma/rest/util/args/FactorValueArrayArg.java diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/ArgUtils.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/ArgUtils.java index 017d14c282..2df076c4df 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/ArgUtils.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/ArgUtils.java @@ -26,6 +26,7 @@ /** * Utilities for working with {@link Arg}. + * * @author poirigui */ public class ArgUtils { @@ -46,7 +47,7 @@ public static String decodeCompressedArg( @Nullable String s ) { try { return IOUtils.toString( new GZIPInputStream( new ByteArrayInputStream( decodedS ) ), StandardCharsets.UTF_8 ); } catch ( IOException e ) { - throw new MalformedArgException( "Invalid base64-encoded filter, make sure that your filter is first gzipped and then base64-encoded.", e ); + throw new IllegalArgumentException( "Invalid base64-encoded filter, make sure that your filter is first gzipped and then base64-encoded.", e ); } } else { return s; diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/AbstractArrayArg.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/AbstractArrayArg.java index 6b8b94130b..42bbf4d832 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/AbstractArrayArg.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/AbstractArrayArg.java @@ -1,34 +1,77 @@ package ubic.gemma.rest.util.args; +import org.apache.commons.lang3.StringUtils; +import ubic.gemma.rest.util.ArgUtils; +import ubic.gemma.rest.util.MalformedArgException; + import java.util.Arrays; import java.util.List; +import java.util.function.Function; + +import static ubic.gemma.rest.util.ArgUtils.decodeCompressedArg; /** - * Class representing an API argument that should be an array. + * Represents a comma-delimited array API argument. *

* If you use this alongside a {@link javax.ws.rs.QueryParam}, make sure that you include a {@link io.swagger.v3.oas.annotations.Parameter} * with the 'explode' attribute set to {@link io.swagger.v3.oas.annotations.enums.Explode#FALSE}, otherwise the * serialization will not be correct. * + * @param the type of elements the array contains * @author tesarst */ public abstract class AbstractArrayArg extends AbstractArg> { - protected static final String ERROR_MSG = "Value '%s' can not converted to an array of "; + /** + * Prefix to use to describe the array schema in subclasses. + * + * @see io.swagger.v3.oas.annotations.media.ArraySchema + */ + public static final String ARRAY_SCHEMA_DESCRIPTION_PREFIX = "A comma-delimited list of "; + + /** + * A description of the base64-gzip encoding to use in array schema descriptions in subclasses. + * + * @see io.swagger.v3.oas.annotations.media.ArraySchema + */ + public static final String ARRAY_SCHEMA_COMPRESSION_DESCRIPTION = "The value may be compressed with gzip and encoded with base64."; protected AbstractArrayArg( List values ) { super( values ); } /** + * Evaluate an input array argument. + *

* Split a string by the ',' comma character and trim the resulting pieces. - * + *

* This is meant to be used for parsing query arguments that use a comma as a delimiter. * - * @param arg the string to process + * @param arg the string to process + * @param ofWhat a description of what is expected + * @param func a function to convert the resulting list of string to the specific array argument + * @param decompressArg decompress the argument as per {@link ArgUtils#decodeCompressedArg(String)} * @return trimmed strings exploded from the input. + * @throws MalformedArgException wrapping any raised {@link IllegalArgumentException} which may be caused by an + * empty string, an invalid base64-gzip encoded input or such an exception raised by + * the passed function */ - protected static List splitAndTrim( String arg ) { - return Arrays.asList( arg.split( "\\s*,\\s*" ) ); + protected static > T valueOf( final String arg, String ofWhat, Function, T> func, boolean decompressArg ) throws MalformedArgException { + String val = "'" + arg + "'"; + try { + String decompressedArg; + if ( decompressArg ) { + decompressedArg = decodeCompressedArg( arg ); + val = "'" + decompressedArg + "' (decompressed from " + val + ")"; + } else { + decompressedArg = arg; + } + if ( StringUtils.isBlank( decompressedArg ) ) { + throw new IllegalArgumentException( String.format( "Provide a value that contains at least one or multiple %s separated by commas.", ofWhat ) ); + } + return func.apply( Arrays.asList( decompressedArg.split( "\\s*,\\s*" ) ) ); + } catch ( IllegalArgumentException e ) { + throw new MalformedArgException( String.format( "Value '%s' can not converted to an array of %s.", val, ofWhat ), e ); + } } } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/CompositeSequenceArrayArg.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/CompositeSequenceArrayArg.java index 04fdca3f5a..417ce2b0b7 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/CompositeSequenceArrayArg.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/CompositeSequenceArrayArg.java @@ -2,7 +2,6 @@ import io.swagger.v3.oas.annotations.media.ArraySchema; import io.swagger.v3.oas.annotations.media.Schema; -import org.apache.commons.lang3.StringUtils; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.persistence.service.expression.designElement.CompositeSequenceDao; @@ -12,13 +11,15 @@ import java.util.List; -@ArraySchema(schema = @Schema(implementation = CompositeSequenceArg.class)) +@ArraySchema(arraySchema = @Schema(description = CompositeSequenceArrayArg.ARRAY_SCHEMA_DESCRIPTION), schema = @Schema(implementation = CompositeSequenceArg.class), minItems = 1) public class CompositeSequenceArrayArg extends AbstractEntityArrayArg { - private static final String ERROR_MSG_DETAIL = "Provide a string that contains at least one " - + "element ID or name, or multiple, separated by (',') character. " - + "All identifiers must be same type, i.e. do not combine IDs and names in one query."; - private static final String ERROR_MSG = AbstractArrayArg.ERROR_MSG + " Element identifiers"; + + public static final String OF_WHAT = "element IDs or names"; + + public static final String ARRAY_SCHEMA_DESCRIPTION = + AbstractArrayArg.ARRAY_SCHEMA_DESCRIPTION_PREFIX + CompositeSequenceArrayArg.OF_WHAT + ". " + + AbstractArrayArg.ARRAY_SCHEMA_COMPRESSION_DESCRIPTION; private ArrayDesign arrayDesign; @@ -26,24 +27,6 @@ private CompositeSequenceArrayArg( List values ) { super( CompositeSequenceArg.class, values ); } - /** - * Used by RS to parse value of request parameters. - * - * @param s the request arrayCompositeSequence argument - * @return an instance of ArrayCompositeSequenceArg representing an array of CompositeSequence identifiers from the - * input string, or a malformed ArrayCompositeSequenceArg that will throw an {@link javax.ws.rs.BadRequestException} - * when accessing its value, if the input String can not be converted into an array of CompositeSequence - * identifiers. - */ - @SuppressWarnings("unused") - public static CompositeSequenceArrayArg valueOf( final String s ) throws MalformedArgException { - if ( StringUtils.isBlank( s ) ) { - throw new MalformedArgException( String.format( CompositeSequenceArrayArg.ERROR_MSG, s ), - new IllegalArgumentException( CompositeSequenceArrayArg.ERROR_MSG_DETAIL ) ); - } - return new CompositeSequenceArrayArg( splitAndTrim( s ) ); - } - public void setPlatform( ArrayDesign arrayDesign ) { this.arrayDesign = arrayDesign; } @@ -51,4 +34,8 @@ public void setPlatform( ArrayDesign arrayDesign ) { public Filter getPlatformFilter() { return Filter.parse( CompositeSequenceDao.OBJECT_ALIAS, "arrayDesign.id", Long.class, Filter.Operator.eq, this.arrayDesign.getId().toString() ); } + + public static CompositeSequenceArrayArg valueOf( final String s ) throws MalformedArgException { + return valueOf( s, OF_WHAT, CompositeSequenceArrayArg::new, true ); + } } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatabaseEntryArrayArg.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatabaseEntryArrayArg.java index 5f5bd4b219..f48c3fdc8e 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatabaseEntryArrayArg.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatabaseEntryArrayArg.java @@ -2,28 +2,23 @@ import io.swagger.v3.oas.annotations.media.ArraySchema; import io.swagger.v3.oas.annotations.media.Schema; -import org.apache.commons.lang3.StringUtils; import ubic.gemma.model.common.description.DatabaseEntry; import ubic.gemma.persistence.service.common.description.DatabaseEntryService; import ubic.gemma.rest.util.MalformedArgException; import java.util.List; -@ArraySchema(schema = @Schema(implementation = DatabaseEntryArg.class)) +@ArraySchema(arraySchema = @Schema(description = DatabaseEntryArrayArg.ARRAY_SCHEMA_DESCRIPTION), schema = @Schema(implementation = DatabaseEntryArg.class), minItems = 1) public class DatabaseEntryArrayArg extends AbstractEntityArrayArg { - private static final String ERROR_MSG_DETAIL = "Provide a string that contains at least one ID or short name, or multiple, separated by (',') character. All identifiers must be same type, i.e. do not combine IDs and short names."; - private static final String ERROR_MSG = AbstractArrayArg.ERROR_MSG + " Database entry identifiers"; + public static final String OF_WHAT = "database entry IDs or accessions"; + public static final String ARRAY_SCHEMA_DESCRIPTION = ARRAY_SCHEMA_DESCRIPTION_PREFIX + OF_WHAT + ". " + ARRAY_SCHEMA_COMPRESSION_DESCRIPTION; private DatabaseEntryArrayArg( List values ) { super( DatabaseEntryArg.class, values ); } public static DatabaseEntryArrayArg valueOf( final String s ) throws MalformedArgException { - if ( StringUtils.isBlank( s ) ) { - throw new MalformedArgException( String.format( DatabaseEntryArrayArg.ERROR_MSG, s ), - new IllegalArgumentException( DatabaseEntryArrayArg.ERROR_MSG_DETAIL ) ); - } - return new DatabaseEntryArrayArg( splitAndTrim( s ) ); + return valueOf( s, OF_WHAT, DatabaseEntryArrayArg::new, true ); } } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArrayArg.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArrayArg.java index c43167631d..92cff21755 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArrayArg.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArrayArg.java @@ -2,38 +2,24 @@ import io.swagger.v3.oas.annotations.media.ArraySchema; import io.swagger.v3.oas.annotations.media.Schema; -import org.apache.commons.lang3.StringUtils; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.rest.util.MalformedArgException; import java.util.List; -@ArraySchema(schema = @Schema(implementation = DatasetArg.class)) +@ArraySchema(arraySchema = @Schema(description = DatasetArrayArg.ARRAY_SCHEMA_DESCRIPTION), schema = @Schema(implementation = DatasetArg.class), minItems = 1) public class DatasetArrayArg extends AbstractEntityArrayArg { - private static final String ERROR_MSG_DETAIL = "Provide a string that contains at least one ID or short name, or multiple, separated by (',') character. All identifiers must be same type, i.e. do not combine IDs and short names."; - private static final String ERROR_MSG = AbstractArrayArg.ERROR_MSG + " Dataset identifiers"; + + public static final String OF_WHAT = "dataset IDs or short names"; + public static final String ARRAY_SCHEMA_DESCRIPTION = ARRAY_SCHEMA_DESCRIPTION_PREFIX + OF_WHAT + ". " + ARRAY_SCHEMA_COMPRESSION_DESCRIPTION; private DatasetArrayArg( List values ) { super( DatasetArg.class, values ); } - /** - * Used by RS to parse value of request parameters. - * - * @param s the request arrayDataset argument - * @return an instance of ArrayDatasetArg representing an array of Dataset identifiers from the input string, or a - * malformed ArrayDatasetArg that will throw an {@link javax.ws.rs.BadRequestException} when accessing its value, if - * the input String can not be converted into an array of Dataset identifiers. - */ - @SuppressWarnings("unused") public static DatasetArrayArg valueOf( final String s ) throws MalformedArgException { - if ( StringUtils.isBlank( s ) ) { - throw new MalformedArgException( String.format( DatasetArrayArg.ERROR_MSG, s ), - new IllegalArgumentException( DatasetArrayArg.ERROR_MSG_DETAIL ) ); - } - return new DatasetArrayArg( splitAndTrim( s ) ); + return valueOf( s, OF_WHAT, DatasetArrayArg::new, true ); } - } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/ExcludeArg.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/ExcludeArg.java index 7b10d87901..bd8fd6732e 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/ExcludeArg.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/ExcludeArg.java @@ -2,8 +2,6 @@ import io.swagger.v3.oas.annotations.media.ArraySchema; import io.swagger.v3.oas.annotations.media.Schema; -import org.apache.commons.lang3.StringUtils; -import ubic.gemma.rest.util.MalformedArgException; import java.util.List; @@ -18,10 +16,6 @@ private ExcludeArg( List values ) { } public static ExcludeArg valueOf( String s ) { - if ( StringUtils.isBlank( s ) ) { - throw new MalformedArgException( String.format( ERROR_MSG, s ), new IllegalArgumentException( - "Provide a string that contains at least one character, or several strings separated by a comma (',') character." ) ); - } - return new ExcludeArg<>( splitAndTrim( s ) ); + return valueOf( s, "excluded fields", ExcludeArg::new, false ); } } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/FactorValueArrayArg.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/FactorValueArrayArg.java deleted file mode 100644 index a3b5e14c85..0000000000 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/FactorValueArrayArg.java +++ /dev/null @@ -1,21 +0,0 @@ -package ubic.gemma.rest.util.args; - -import io.swagger.v3.oas.annotations.media.ArraySchema; -import io.swagger.v3.oas.annotations.media.Schema; -import ubic.gemma.model.expression.experiment.FactorValue; -import ubic.gemma.persistence.service.expression.experiment.FactorValueService; - -import java.util.List; - -@ArraySchema(schema = @Schema(implementation = FactorValueArg.class)) -public class FactorValueArrayArg extends AbstractEntityArrayArg { - - public FactorValueArrayArg( List values ) { - super( FactorValueArg.class, values ); - } - - public static FactorValueArrayArg valueOf( String s ) { - return new FactorValueArrayArg( splitAndTrim( s ) ); - } - -} diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/FilterArg.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/FilterArg.java index 4ad55c6267..fdbef84dff 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/FilterArg.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/FilterArg.java @@ -246,7 +246,13 @@ private Filter( FilterArgParser.FilterContext filterContext ) { public static FilterArg valueOf( String s ) { LoggingErrorListener lel = new LoggingErrorListener(); - FilterArgLexer lexer = new FilterArgLexer( CharStreams.fromString( decodeCompressedArg( s ) ) ) { + try { + s = decodeCompressedArg( s ); + } catch ( IllegalArgumentException e ) { + throw new MalformedArgException( e ); + } + + FilterArgLexer lexer = new FilterArgLexer( CharStreams.fromString( s ) ) { @Override public void recover( RecognitionException re ) { throw new ParseCancellationException( re ); diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/GeneArrayArg.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/GeneArrayArg.java index b3a24a520b..052ee054fd 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/GeneArrayArg.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/GeneArrayArg.java @@ -2,37 +2,22 @@ import io.swagger.v3.oas.annotations.media.ArraySchema; import io.swagger.v3.oas.annotations.media.Schema; -import org.apache.commons.lang3.StringUtils; -import ubic.gemma.persistence.service.genome.gene.GeneService; import ubic.gemma.model.genome.Gene; -import ubic.gemma.rest.util.MalformedArgException; +import ubic.gemma.persistence.service.genome.gene.GeneService; import java.util.List; -@ArraySchema(schema = @Schema(implementation = GeneArg.class)) +@ArraySchema(arraySchema = @Schema(description = GeneArrayArg.ARRAY_SCHEMA_DESCRIPTION), schema = @Schema(implementation = GeneArg.class), minItems = 1) public class GeneArrayArg extends AbstractEntityArrayArg { - private static final String ERROR_MSG_DETAIL = "Provide a string that contains at least one Ncbi ID, Ensembl ID or official symbol, or multiple, separated by (',') character. All identifiers must be same type, i.e. do not combine Ensembl and Ncbi IDs."; - private static final String ERROR_MSG = AbstractArrayArg.ERROR_MSG + " Gene identifiers"; + + public static final String OF_WHAT = "NCBI IDs, Ensembl IDs or gene symbols"; + public static final String ARRAY_SCHEMA_DESCRIPTION = ARRAY_SCHEMA_DESCRIPTION_PREFIX + OF_WHAT + ". " + ARRAY_SCHEMA_COMPRESSION_DESCRIPTION; private GeneArrayArg( List values ) { super( GeneArg.class, values ); } - /** - * Used by RS to parse value of request parameters. - * - * @param s the request arrayGene argument - * @return an instance of ArrayGeneArg representing an array of Gene identifiers from the input string, or a - * malformed ArrayGeneArg that will throw an {@link javax.ws.rs.BadRequestException} when accessing its value, if - * the input String can not be converted into an array of Gene identifiers. - */ - @SuppressWarnings("unused") public static GeneArrayArg valueOf( final String s ) { - if ( StringUtils.isBlank( s ) ) { - throw new MalformedArgException( String.format( GeneArrayArg.ERROR_MSG, s ), - new IllegalArgumentException( GeneArrayArg.ERROR_MSG_DETAIL ) ); - } - return new GeneArrayArg( splitAndTrim( s ) ); + return valueOf( s, OF_WHAT, GeneArrayArg::new, true ); } - } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/PlatformArrayArg.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/PlatformArrayArg.java index a482607662..48da0db20e 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/PlatformArrayArg.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/PlatformArrayArg.java @@ -2,37 +2,23 @@ import io.swagger.v3.oas.annotations.media.ArraySchema; import io.swagger.v3.oas.annotations.media.Schema; -import org.apache.commons.lang3.StringUtils; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.rest.util.MalformedArgException; import java.util.List; -@ArraySchema(schema = @Schema(implementation = PlatformArg.class)) +@ArraySchema(arraySchema = @Schema(description = PlatformArrayArg.ARRAY_SCHEMA_DESCRIPTION), schema = @Schema(implementation = PlatformArg.class), minItems = 1) public class PlatformArrayArg extends AbstractEntityArrayArg { - private static final String ERROR_MSG_DETAIL = "Provide a string that contains at least one ID or short name, or multiple, separated by (',') character. All identifiers must be same type, i.e. do not combine IDs and short names."; - private static final String ERROR_MSG = AbstractArrayArg.ERROR_MSG + " Platform identifiers"; + + public static final String OF_WHAT = "platform IDs or short names"; + public static final String ARRAY_SCHEMA_DESCRIPTION = ARRAY_SCHEMA_DESCRIPTION_PREFIX + OF_WHAT + ". " + ARRAY_SCHEMA_COMPRESSION_DESCRIPTION; private PlatformArrayArg( List values ) { super( PlatformArg.class, values ); } - /** - * Used by RS to parse value of request parameters. - * - * @param s the request arrayPlatform argument - * @return an instance of ArrayPlatformArg representing an array of Platform identifiers from the input string, or a - * malformed ArrayPlatformArg that will throw an {@link javax.ws.rs.BadRequestException} when accessing its value, - * if the input String can not be converted into an array of Platform identifiers. - */ - @SuppressWarnings("unused") public static PlatformArrayArg valueOf( final String s ) throws MalformedArgException { - if ( StringUtils.isBlank( s ) ) { - throw new MalformedArgException( String.format( PlatformArrayArg.ERROR_MSG, s ), - new IllegalArgumentException( PlatformArrayArg.ERROR_MSG_DETAIL ) ); - } - return new PlatformArrayArg( splitAndTrim( s ) ); + return valueOf( s, OF_WHAT, PlatformArrayArg::new, true ); } - } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/StringArrayArg.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/StringArrayArg.java index 35bee7850c..f0dc5851c4 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/StringArrayArg.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/StringArrayArg.java @@ -2,41 +2,25 @@ import io.swagger.v3.oas.annotations.media.ArraySchema; import io.swagger.v3.oas.annotations.media.Schema; -import org.apache.commons.lang3.StringUtils; -import ubic.gemma.rest.util.MalformedArgException; import java.util.List; -import static ubic.gemma.rest.util.ArgUtils.decodeCompressedArg; - /** - * Class representing an API argument that should be an array of Strings. + * Class representing an API argument that should be an array of strings. * * @author tesarst */ -@ArraySchema(schema = @Schema(implementation = String.class), - arraySchema = @Schema(description = "A comma-delimited list of strings. The value may be compressed with gzip and encoded with base64.")) +@ArraySchema(arraySchema = @Schema(description = StringArrayArg.ARRAY_SCHEMA_DESCRIPTION), schema = @Schema(implementation = String.class), minItems = 1) public class StringArrayArg extends AbstractArrayArg { - private static final String ERROR_MSG = AbstractArrayArg.ERROR_MSG + " Strings"; + + public static final String OF_WHAT = "strings"; + public static final String ARRAY_SCHEMA_DESCRIPTION = ARRAY_SCHEMA_DESCRIPTION_PREFIX + OF_WHAT + ". " + ARRAY_SCHEMA_COMPRESSION_DESCRIPTION; private StringArrayArg( List values ) { super( values ); } - /** - * Used by RS to parse value of request parameters. - * - * @param s the request arrayString argument - * @return an instance of ArrayStringArg representing array of strings from the input string, or a malformed - * ArrayStringArg that will throw an {@link javax.ws.rs.BadRequestException} when accessing its value, if the input - * String can not be converted into an array of strings. - */ public static StringArrayArg valueOf( String s ) { - s = decodeCompressedArg( s ); - if ( StringUtils.isBlank( s ) ) { - throw new MalformedArgException( String.format( ERROR_MSG, s ), new IllegalArgumentException( - "Provide a string that contains at least one character, or several strings separated by a comma (',') character." ) ); - } - return new StringArrayArg( splitAndTrim( s ) ); + return valueOf( s, "strings", StringArrayArg::new, true ); } } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/TaxonArrayArg.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/TaxonArrayArg.java index e9f015397b..f71e84ef30 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/TaxonArrayArg.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/TaxonArrayArg.java @@ -2,38 +2,22 @@ import io.swagger.v3.oas.annotations.media.ArraySchema; import io.swagger.v3.oas.annotations.media.Schema; -import org.apache.commons.lang3.StringUtils; import ubic.gemma.model.genome.Taxon; import ubic.gemma.persistence.service.genome.taxon.TaxonService; -import ubic.gemma.rest.util.MalformedArgException; import java.util.List; -@ArraySchema(schema = @Schema(implementation = TaxonArg.class)) +@ArraySchema(arraySchema = @Schema(description = TaxonArrayArg.ARRAY_SCHEMA_DESCRIPTION), schema = @Schema(implementation = TaxonArg.class), minItems = 1) public class TaxonArrayArg extends AbstractEntityArrayArg { - private static final String ERROR_MSG_DETAIL = "Provide a string that contains at least one " - + "ID, NCBI ID, scientific name or common name or multiple, separated by (',') character. " - + "All identifiers must be same type, i.e. do not combine different kinds of IDs and string identifiers."; - private static final String ERROR_MSG = AbstractArrayArg.ERROR_MSG + " Taxon identifiers"; + + public static final String OF_WHAT = "taxon IDs, NCBI IDs, common names or scientific names"; + public static final String ARRAY_SCHEMA_DESCRIPTION = ARRAY_SCHEMA_DESCRIPTION_PREFIX + OF_WHAT + "."; private TaxonArrayArg( List values ) { super( TaxonArg.class, values ); } - /** - * Used by RS to parse value of request parameters. - * - * @param s the request arrayTaxon argument - * @return an instance of ArrayTaxonArg representing an array of Taxon identifiers from the input string, or a - * malformed ArrayTaxonArg that will throw an {@link javax.ws.rs.BadRequestException} when accessing its value, if - * the input String can not be converted into an array of Taxon identifiers. - */ - @SuppressWarnings("unused") public static TaxonArrayArg valueOf( final String s ) { - if ( StringUtils.isBlank( s ) ) { - throw new MalformedArgException( String.format( TaxonArrayArg.ERROR_MSG, s ), - new IllegalArgumentException( TaxonArrayArg.ERROR_MSG_DETAIL ) ); - } - return new TaxonArrayArg( splitAndTrim( s ) ); + return valueOf( s, OF_WHAT, TaxonArrayArg::new, false ); } } From f393772ce84878cdd4f9211352dcc6c3e5b4c83d Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 6 May 2024 12:51:37 -0700 Subject: [PATCH 20/81] Retrieve DE results per gene (fix #1111) --- .../DifferentialExpressionAnalysisResult.java | 1 - ...alExpressionAnalysisResultValueObject.java | 16 +++-- .../DifferentialExpressionAnalysisDao.java | 14 +++++ ...DifferentialExpressionAnalysisDaoImpl.java | 63 +++++++++++++++++-- ...DifferentialExpressionAnalysisService.java | 12 ++++ ...erentialExpressionAnalysisServiceImpl.java | 17 +++-- .../ubic/gemma/rest/DatasetsWebService.java | 46 ++++++++++++++ 7 files changed, 156 insertions(+), 13 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/DifferentialExpressionAnalysisResult.java b/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/DifferentialExpressionAnalysisResult.java index 76b3d0bd2b..81029037ef 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/DifferentialExpressionAnalysisResult.java +++ b/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/DifferentialExpressionAnalysisResult.java @@ -22,7 +22,6 @@ import ubic.gemma.model.expression.designElement.CompositeSequence; import java.io.Serializable; -import java.util.Collection; import java.util.HashSet; import java.util.Set; diff --git a/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/DifferentialExpressionAnalysisResultValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/DifferentialExpressionAnalysisResultValueObject.java index b7c273719f..4fd9ec7339 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/DifferentialExpressionAnalysisResultValueObject.java +++ b/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/DifferentialExpressionAnalysisResultValueObject.java @@ -14,6 +14,7 @@ */ package ubic.gemma.model.analysis.expression.diff; +import com.fasterxml.jackson.annotation.JsonInclude; import lombok.Data; import lombok.EqualsAndHashCode; import org.hibernate.Hibernate; @@ -33,6 +34,7 @@ public class DifferentialExpressionAnalysisResultValueObject extends AnalysisRes private Long probeId; private String probeName; + @JsonInclude(JsonInclude.Include.NON_NULL) private List genes; private Double pValue; private Double correctedPvalue; @@ -43,11 +45,12 @@ public DifferentialExpressionAnalysisResultValueObject() { super(); } - public DifferentialExpressionAnalysisResultValueObject( DifferentialExpressionAnalysisResult result, List genes ) { + public DifferentialExpressionAnalysisResultValueObject( DifferentialExpressionAnalysisResult result ) { super( result ); - this.probeId = result.getProbe().getId(); - this.probeName = result.getProbe().getName(); - this.genes = genes.stream().map( GeneValueObject::new ).collect( Collectors.toList() ); + if ( Hibernate.isInitialized( result.getProbe() ) ) { + this.probeId = result.getProbe().getId(); + this.probeName = result.getProbe().getName(); + } this.pValue = result.getPvalue(); this.correctedPvalue = result.getCorrectedPvalue(); this.rank = result.getRank(); @@ -59,4 +62,9 @@ public DifferentialExpressionAnalysisResultValueObject( DifferentialExpressionAn this.contrasts = null; } } + + public DifferentialExpressionAnalysisResultValueObject( DifferentialExpressionAnalysisResult result, List genes ) { + this( result ); + this.genes = genes.stream().map( GeneValueObject::new ).collect( Collectors.toList() ); + } } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDao.java index 97935314af..44017cb076 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDao.java @@ -19,6 +19,7 @@ package ubic.gemma.persistence.service.analysis.expression.diff; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis; +import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisValueObject; import ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet; import ubic.gemma.model.expression.experiment.BioAssaySet; @@ -28,6 +29,7 @@ import ubic.gemma.model.genome.Taxon; import ubic.gemma.persistence.service.analysis.SingleExperimentAnalysisDao; +import javax.annotation.Nullable; import java.util.Collection; import java.util.List; import java.util.Map; @@ -69,6 +71,18 @@ Collection find( Gene gene, ExpressionAnalysisRe Collection findExperimentsWithAnalyses( Gene gene ); + /** + * Retrieve differential expression results for a given gene across all the given datasets. + *

+ * If experiment IDs are provided, analysis of their subsets will also be included and the results will be attached + * to the corresponding source experiment in the {@code result2ExperimentId} mapping. + * + * @param gene a specific gene to retrieve differential expression for + * @param experimentIds list of IDs of experiments to consider, or all experiments if null + * @param result2ExperimentId mapping of result to experiment ID they belong, ignored if null + */ + List findResultsByGene( Gene gene, @Nullable Collection experimentIds, @Nullable Map result2ExperimentId ); + Map> getAnalyses( Collection expressionExperiments ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java index 9c8463ce65..2084fcbc30 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java @@ -21,10 +21,7 @@ import org.apache.commons.collections4.ListUtils; import org.apache.commons.lang3.time.StopWatch; import org.apache.commons.lang3.tuple.Pair; -import org.hibernate.Hibernate; -import org.hibernate.HibernateException; -import org.hibernate.Session; -import org.hibernate.SessionFactory; +import org.hibernate.*; import org.hibernate.engine.jdbc.spi.SqlStatementLogger; import org.hibernate.engine.spi.SessionImplementor; import org.hibernate.id.IdentifierGeneratorHelper; @@ -46,6 +43,7 @@ import ubic.gemma.persistence.util.CommonQueries; import ubic.gemma.persistence.util.EntityUtils; +import javax.annotation.Nullable; import java.io.Serializable; import java.sql.PreparedStatement; import java.sql.ResultSet; @@ -662,6 +660,63 @@ public Map> getAnalysesByE } + @Override + public List findResultsByGene( Gene gene, @Nullable Collection experimentIds, @Nullable Map result2ExperimentId ) { + if ( experimentIds != null && experimentIds.isEmpty() ) { + return Collections.emptyList(); + } + //noinspection unchecked + List probeIds = getSessionFactory().getCurrentSession() + .createSQLQuery( "select CS from GENE2CS where GENE = :geneId" ) + .addScalar( "CS", StandardBasicTypes.LONG ) + .setParameter( "geneId", gene.getId() ) + .list(); + if ( probeIds.isEmpty() ) { + log.warn( String.format( "%s has no associated probes in the GENE2CS table, no differential expression results will be returned.", gene ) ); + return Collections.emptyList(); + } + Set bioAssaySetIds = null; + Map subsetIdToExperimentId = null; + if ( experimentIds != null ) { + subsetIdToExperimentId = streamByBatch( getSessionFactory().getCurrentSession() + .createQuery( "select eess.id, eess.sourceExperiment.id from ExpressionExperimentSubSet eess" + + " where eess.sourceExperiment.id in :eeIds" ), "eeIds", experimentIds, 2048, Object[].class ) + .collect( Collectors.toMap( row -> ( Long ) row[0], row -> ( Long ) row[1] ) ); + bioAssaySetIds = new HashSet<>( experimentIds ); + bioAssaySetIds.addAll( subsetIdToExperimentId.keySet() ); + } else if ( result2ExperimentId != null ) { + // fetch all subset -> source mappings + //noinspection unchecked + List r2 = getSessionFactory().getCurrentSession() + .createQuery( "select eess.id, eess.sourceExperiment.id from ExpressionExperimentSubSet eess where eess.sourceExperiment in :eeIds" ) + .list(); + subsetIdToExperimentId = r2.stream().collect( Collectors.toMap( row -> ( Long ) row[0], row -> ( Long ) row[1] ) ); + } + Query query = getSessionFactory().getCurrentSession() + .createQuery( "select dear, dea.experimentAnalyzed.id from DifferentialExpressionAnalysis dea " + + "join dea.resultSets dears " + + "join dears.results dear " + + "where dear.probe.id in :probeIds" + + ( experimentIds != null ? " and dea.experimentAnalyzed.id in :bioAssaySetIds" : "" ) ) + .setParameterList( "probeIds", probeIds ); + List result; + if ( bioAssaySetIds != null ) { + result = listByBatch( query, "bioAssaySetIds", bioAssaySetIds, 2048 ); + } else { + //noinspection unchecked + result = query.list(); + } + List rs = new ArrayList<>( result.size() ); + for ( Object[] row : result ) { + DifferentialExpressionAnalysisResult r = ( DifferentialExpressionAnalysisResult ) row[0]; + rs.add( r ); + if ( result2ExperimentId != null ) { + result2ExperimentId.put( r, subsetIdToExperimentId.getOrDefault( ( Long ) row[1], ( Long ) row[1] ) ); + } + } + return rs; + } + @Override public void remove( DifferentialExpressionAnalysis analysis ) { log.info( "Removing " + analysis + "..." ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisService.java index 70f01865e5..1e81654b48 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisService.java @@ -20,17 +20,20 @@ import org.springframework.security.access.annotation.Secured; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis; +import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisValueObject; import ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet; import ubic.gemma.model.expression.experiment.BioAssaySet; import ubic.gemma.model.expression.experiment.ExperimentalFactor; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.expression.experiment.ExpressionExperimentDetailsValueObject; +import ubic.gemma.model.genome.Gene; import ubic.gemma.model.genome.Taxon; import ubic.gemma.persistence.service.BaseService; import ubic.gemma.persistence.service.analysis.SingleExperimentAnalysisService; import javax.annotation.CheckReturnValue; +import javax.annotation.Nullable; import java.util.Collection; import java.util.List; import java.util.Map; @@ -84,6 +87,15 @@ Collection find( ubic.gemma.model.genome.Gene ge @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "AFTER_ACL_COLLECTION_READ" }) Collection findExperimentsWithAnalyses( ubic.gemma.model.genome.Gene gene ); + /** + * Retrieve differential expression results for a given gene. + *

+ * Probes and contrasts are initialized. + * @see DifferentialExpressionAnalysisDao#findResultsByGene(Gene, Collection, Map) + */ + @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY" }) + List findResultsByGene( Gene gene, @Nullable Collection experimentIds, Map result2ExperimentId ); + @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "ACL_SECURABLE_READ", "AFTER_ACL_COLLECTION_READ" }) Collection getAnalyses( BioAssaySet expressionExperiment ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java index 693ccb2810..e5c5817813 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java @@ -26,10 +26,7 @@ import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import ubic.gemma.core.tasks.analysis.diffex.DifferentialExpressionAnalysisTask; -import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis; -import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisValueObject; -import ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet; -import ubic.gemma.model.analysis.expression.diff.GeneDifferentialExpressionMetaAnalysis; +import ubic.gemma.model.analysis.expression.diff.*; import ubic.gemma.model.expression.bioAssay.BioAssay; import ubic.gemma.model.expression.experiment.*; import ubic.gemma.model.genome.Gene; @@ -38,6 +35,7 @@ import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentDao; import ubic.gemma.persistence.util.EntityUtils; +import javax.annotation.Nullable; import java.util.*; /** @@ -117,6 +115,17 @@ public Collection findExperimentsWithAnalyses( Gene gene ) { return this.differentialExpressionAnalysisDao.findExperimentsWithAnalyses( gene ); } + @Override + @Transactional(readOnly = true) + public List findResultsByGene( Gene gene, @Nullable Collection experimentIds, @Nullable Map result2ExperimentId ) { + List results = differentialExpressionAnalysisDao.findResultsByGene( gene, experimentIds, result2ExperimentId ); + for ( DifferentialExpressionAnalysisResult result : results ) { + Hibernate.initialize( result.getProbe() ); + Hibernate.initialize( result.getContrasts() ); + } + return results; + } + @Override @Transactional(readOnly = true) public Collection getAnalyses( BioAssaySet expressionExperiment ) { diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index a2a972df45..64f12598ae 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -22,6 +22,7 @@ import io.swagger.v3.oas.annotations.media.Schema; import io.swagger.v3.oas.annotations.responses.ApiResponse; import io.swagger.v3.oas.annotations.security.SecurityRequirement; +import lombok.Data; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.Value; @@ -44,6 +45,8 @@ import ubic.gemma.core.search.DefaultHighlighter; import ubic.gemma.core.search.SearchResult; import ubic.gemma.core.search.lucene.SimpleMarkdownFormatter; +import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult; +import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResultValueObject; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisValueObject; import ubic.gemma.model.common.auditAndSecurity.AuditEvent; import ubic.gemma.model.common.auditAndSecurity.eventType.BatchInformationFetchingEvent; @@ -62,6 +65,7 @@ import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.expression.experiment.ExpressionExperimentDetailsValueObject; import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject; +import ubic.gemma.model.genome.Gene; import ubic.gemma.model.genome.Taxon; import ubic.gemma.model.genome.TaxonValueObject; import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; @@ -1092,6 +1096,48 @@ public ResponseDataObject> getDatase ); } + /** + * Obtain differential expression results for a given gene. + */ + @GET + @Path("/differential/genes/{gene}") + @Produces(MediaType.APPLICATION_JSON) + @Operation(summary = "Retrieve the differential expression results for a given gene") + public ResponseDataObject> getDatasetsDifferentialExpressionForGene( + @PathParam("gene") GeneArg geneArg, + @QueryParam("query") QueryArg query, + @QueryParam("filter") @DefaultValue("") FilterArg filter + ) { + Gene gene = geneArgService.getEntity( geneArg ); + Set ids = new HashSet<>( expressionExperimentService.loadIdsWithCache( datasetArgService.getFilters( filter ), null ) ); + if ( query != null ) { + ids.retainAll( datasetArgService.getIdsForSearchQuery( query ) ); + } + if ( ids.isEmpty() ) { + respond( Collections.emptyList() ); + } + Map datasetByResult = new HashMap<>(); + List rs = differentialExpressionAnalysisService.findResultsByGene( gene, ids, datasetByResult ).stream() + .map( r -> new DifferentialExpressionAnalysisResultWithDatasetIdValueObject( r, datasetByResult.get( r ) ) ) + .collect( Collectors.toList() ); + // TODO: pick one result per gene and dataset + return respond( rs ); + } + + @Data + @EqualsAndHashCode(callSuper = true) + public static class DifferentialExpressionAnalysisResultWithDatasetIdValueObject extends DifferentialExpressionAnalysisResultValueObject { + + private Long datasetId; + private Long resultSetId; + + public DifferentialExpressionAnalysisResultWithDatasetIdValueObject( DifferentialExpressionAnalysisResult result, Long datasetId ) { + super( result ); + this.datasetId = datasetId; + this.resultSetId = result.getResultSet().getId(); + } + } + /** * Retrieve a "refreshed" dataset. *

From 6f5ed69c1b22b17a712e05fc739cad981cabc828 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 6 May 2024 13:05:43 -0700 Subject: [PATCH 21/81] Pick best result by dataset Make initialization of probe and contrasts optional. Adjust the VO to omit the corresponding fields when uninitialized. --- ...alExpressionAnalysisResultValueObject.java | 3 ++ ...DifferentialExpressionAnalysisDaoImpl.java | 14 +++--- ...DifferentialExpressionAnalysisService.java | 8 ++-- ...erentialExpressionAnalysisServiceImpl.java | 43 +++++++++++++++++-- .../ubic/gemma/rest/DatasetsWebService.java | 8 ++-- 5 files changed, 58 insertions(+), 18 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/DifferentialExpressionAnalysisResultValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/DifferentialExpressionAnalysisResultValueObject.java index 4fd9ec7339..509c186cc9 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/DifferentialExpressionAnalysisResultValueObject.java +++ b/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/DifferentialExpressionAnalysisResultValueObject.java @@ -32,13 +32,16 @@ @EqualsAndHashCode(of = { "probeId" }, callSuper = true) public class DifferentialExpressionAnalysisResultValueObject extends AnalysisResultValueObject { + @JsonInclude(JsonInclude.Include.NON_NULL) private Long probeId; + @JsonInclude(JsonInclude.Include.NON_NULL) private String probeName; @JsonInclude(JsonInclude.Include.NON_NULL) private List genes; private Double pValue; private Double correctedPvalue; private Double rank; + @JsonInclude(JsonInclude.Include.NON_NULL) private List contrasts; public DifferentialExpressionAnalysisResultValueObject() { diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java index 2084fcbc30..fb5f2f4384 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java @@ -688,19 +688,21 @@ public List findResultsByGene( Gene gene, // fetch all subset -> source mappings //noinspection unchecked List r2 = getSessionFactory().getCurrentSession() - .createQuery( "select eess.id, eess.sourceExperiment.id from ExpressionExperimentSubSet eess where eess.sourceExperiment in :eeIds" ) + .createQuery( "select eess.id, eess.sourceExperiment.id from ExpressionExperimentSubSet eess" ) .list(); subsetIdToExperimentId = r2.stream().collect( Collectors.toMap( row -> ( Long ) row[0], row -> ( Long ) row[1] ) ); } Query query = getSessionFactory().getCurrentSession() - .createQuery( "select dear, dea.experimentAnalyzed.id from DifferentialExpressionAnalysis dea " - + "join dea.resultSets dears " - + "join dears.results dear " + .createQuery( "select dear, e.id from DifferentialExpressionAnalysisResult dear " + + "join dear.resultSet dears " + + "join dears.analysis dea " + + "join dea.experimentAnalyzed e " + "where dear.probe.id in :probeIds" - + ( experimentIds != null ? " and dea.experimentAnalyzed.id in :bioAssaySetIds" : "" ) ) - .setParameterList( "probeIds", probeIds ); + + ( bioAssaySetIds != null ? " and e.id in :bioAssaySetIds" : "" ) ) + .setParameterList( "probeIds", optimizeParameterList( probeIds ) ); List result; if ( bioAssaySetIds != null ) { + // this batch size has been optimized on the *fetch everything* worst case scenario result = listByBatch( query, "bioAssaySetIds", bioAssaySetIds, 2048 ); } else { //noinspection unchecked diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisService.java index 1e81654b48..b403da4fc4 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisService.java @@ -89,12 +89,14 @@ Collection find( ubic.gemma.model.genome.Gene ge /** * Retrieve differential expression results for a given gene. - *

- * Probes and contrasts are initialized. + * @param initializeProbes if true, initialize probes + * @param initializeContrasts if true, initialize contrasts + * @param pickBestByDataset pick the best analysis result by dataset (i.e. the one with the lowest P-value), + * otherwise all results are returned. * @see DifferentialExpressionAnalysisDao#findResultsByGene(Gene, Collection, Map) */ @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY" }) - List findResultsByGene( Gene gene, @Nullable Collection experimentIds, Map result2ExperimentId ); + List findResultsByGene( Gene gene, @Nullable Collection experimentIds, @Nullable Map result2ExperimentId, boolean initializeProbes, boolean initializeContrasts, boolean pickBestByDataset ); @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "ACL_SECURABLE_READ", "AFTER_ACL_COLLECTION_READ" }) Collection getAnalyses( BioAssaySet expressionExperiment ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java index e5c5817813..0157f605b6 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java @@ -37,6 +37,7 @@ import javax.annotation.Nullable; import java.util.*; +import java.util.stream.Collectors; /** * @author paul @@ -117,15 +118,49 @@ public Collection findExperimentsWithAnalyses( Gene gene ) { @Override @Transactional(readOnly = true) - public List findResultsByGene( Gene gene, @Nullable Collection experimentIds, @Nullable Map result2ExperimentId ) { + public List findResultsByGene( Gene gene, @Nullable Collection experimentIds, @Nullable Map result2ExperimentId, boolean initializeProbes, boolean initializeContrasts, boolean pickBestByDataset ) { + StopWatch timer = StopWatch.createStarted(); + if ( pickBestByDataset && result2ExperimentId == null ) { + result2ExperimentId = new HashMap<>(); + } List results = differentialExpressionAnalysisDao.findResultsByGene( gene, experimentIds, result2ExperimentId ); - for ( DifferentialExpressionAnalysisResult result : results ) { - Hibernate.initialize( result.getProbe() ); - Hibernate.initialize( result.getContrasts() ); + long queryingMs = timer.getTime(); + if ( pickBestByDataset ) { + results = pickBestByDataset( results, result2ExperimentId ); + } + long probesMs = timer.getTime(); + if ( initializeProbes ) { + for ( DifferentialExpressionAnalysisResult result : results ) { + Hibernate.initialize( result.getProbe() ); + } + } + probesMs = timer.getTime() - probesMs; + long contrastsMs = timer.getTime(); + if ( initializeContrasts ) { + for ( DifferentialExpressionAnalysisResult result : results ) { + Hibernate.initialize( result.getContrasts() ); + } + } + contrastsMs = timer.getTime() - contrastsMs; + if ( timer.getTime() > 1000 ) { + log.warn( String.format( "Retrieving %d diffex results for %s took %d ms (querying results: %d ms, initializing probes: %d ms, initializing contrasts: %d ms)", + results.size(), gene, timer.getTime(), queryingMs, probesMs, contrastsMs ) ); } return results; } + /** + * Pick the best analysis result per dataset. + */ + private List pickBestByDataset( List rs, Map result2ExperimentIdMap ) { + Map bestByDataset = rs.stream() + .collect( Collectors.groupingBy( + result2ExperimentIdMap::get, + Collectors.collectingAndThen( Collectors.toList(), ( List l ) -> + l.stream().min( Comparator.comparing( DifferentialExpressionAnalysisResult::getPvalue, Comparator.nullsLast( Comparator.naturalOrder() ) ) ).orElseThrow( IllegalStateException::new ) ) ) ); + return new ArrayList<>( bestByDataset.values() ); + } + @Override @Transactional(readOnly = true) public Collection getAnalyses( BioAssaySet expressionExperiment ) { diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 64f12598ae..8498d13bd3 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -1114,14 +1114,12 @@ public ResponseDataObject datasetByResult = new HashMap<>(); - List rs = differentialExpressionAnalysisService.findResultsByGene( gene, ids, datasetByResult ).stream() + return respond( differentialExpressionAnalysisService.findResultsByGene( gene, ids, datasetByResult, false, true, true ).stream() .map( r -> new DifferentialExpressionAnalysisResultWithDatasetIdValueObject( r, datasetByResult.get( r ) ) ) - .collect( Collectors.toList() ); - // TODO: pick one result per gene and dataset - return respond( rs ); + .collect( Collectors.toList() ) ); } @Data From b2abf7f99d4a00a64a98d47fb758e0d927acf1cb Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 7 May 2024 10:02:07 -0700 Subject: [PATCH 22/81] Relocate findResultsByGene() DifferentialExpressionResultService and DAO Initialize contrasts in the query and make some simplifications. --- .../DifferentialExpressionAnalysisDao.java | 14 --- ...DifferentialExpressionAnalysisDaoImpl.java | 65 +------------- ...DifferentialExpressionAnalysisService.java | 14 --- ...erentialExpressionAnalysisServiceImpl.java | 45 ---------- .../diff/DifferentialExpressionResultDao.java | 11 +++ .../DifferentialExpressionResultDaoImpl.java | 58 +++++++++++++ .../DifferentialExpressionResultService.java | 16 ++-- ...fferentialExpressionResultServiceImpl.java | 25 +++++- .../DifferentialExpressionResultDaoTest.java | 28 +++++- .../ubic/gemma/rest/DatasetsWebService.java | 85 ++++++++++--------- .../java/ubic/gemma/rest/util/Responders.java | 4 + .../gemma/rest/DatasetsWebServiceTest.java | 27 ++++-- 12 files changed, 200 insertions(+), 192 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDao.java index 44017cb076..97935314af 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDao.java @@ -19,7 +19,6 @@ package ubic.gemma.persistence.service.analysis.expression.diff; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis; -import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisValueObject; import ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet; import ubic.gemma.model.expression.experiment.BioAssaySet; @@ -29,7 +28,6 @@ import ubic.gemma.model.genome.Taxon; import ubic.gemma.persistence.service.analysis.SingleExperimentAnalysisDao; -import javax.annotation.Nullable; import java.util.Collection; import java.util.List; import java.util.Map; @@ -71,18 +69,6 @@ Collection find( Gene gene, ExpressionAnalysisRe Collection findExperimentsWithAnalyses( Gene gene ); - /** - * Retrieve differential expression results for a given gene across all the given datasets. - *

- * If experiment IDs are provided, analysis of their subsets will also be included and the results will be attached - * to the corresponding source experiment in the {@code result2ExperimentId} mapping. - * - * @param gene a specific gene to retrieve differential expression for - * @param experimentIds list of IDs of experiments to consider, or all experiments if null - * @param result2ExperimentId mapping of result to experiment ID they belong, ignored if null - */ - List findResultsByGene( Gene gene, @Nullable Collection experimentIds, @Nullable Map result2ExperimentId ); - Map> getAnalyses( Collection expressionExperiments ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java index fb5f2f4384..9c8463ce65 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java @@ -21,7 +21,10 @@ import org.apache.commons.collections4.ListUtils; import org.apache.commons.lang3.time.StopWatch; import org.apache.commons.lang3.tuple.Pair; -import org.hibernate.*; +import org.hibernate.Hibernate; +import org.hibernate.HibernateException; +import org.hibernate.Session; +import org.hibernate.SessionFactory; import org.hibernate.engine.jdbc.spi.SqlStatementLogger; import org.hibernate.engine.spi.SessionImplementor; import org.hibernate.id.IdentifierGeneratorHelper; @@ -43,7 +46,6 @@ import ubic.gemma.persistence.util.CommonQueries; import ubic.gemma.persistence.util.EntityUtils; -import javax.annotation.Nullable; import java.io.Serializable; import java.sql.PreparedStatement; import java.sql.ResultSet; @@ -660,65 +662,6 @@ public Map> getAnalysesByE } - @Override - public List findResultsByGene( Gene gene, @Nullable Collection experimentIds, @Nullable Map result2ExperimentId ) { - if ( experimentIds != null && experimentIds.isEmpty() ) { - return Collections.emptyList(); - } - //noinspection unchecked - List probeIds = getSessionFactory().getCurrentSession() - .createSQLQuery( "select CS from GENE2CS where GENE = :geneId" ) - .addScalar( "CS", StandardBasicTypes.LONG ) - .setParameter( "geneId", gene.getId() ) - .list(); - if ( probeIds.isEmpty() ) { - log.warn( String.format( "%s has no associated probes in the GENE2CS table, no differential expression results will be returned.", gene ) ); - return Collections.emptyList(); - } - Set bioAssaySetIds = null; - Map subsetIdToExperimentId = null; - if ( experimentIds != null ) { - subsetIdToExperimentId = streamByBatch( getSessionFactory().getCurrentSession() - .createQuery( "select eess.id, eess.sourceExperiment.id from ExpressionExperimentSubSet eess" - + " where eess.sourceExperiment.id in :eeIds" ), "eeIds", experimentIds, 2048, Object[].class ) - .collect( Collectors.toMap( row -> ( Long ) row[0], row -> ( Long ) row[1] ) ); - bioAssaySetIds = new HashSet<>( experimentIds ); - bioAssaySetIds.addAll( subsetIdToExperimentId.keySet() ); - } else if ( result2ExperimentId != null ) { - // fetch all subset -> source mappings - //noinspection unchecked - List r2 = getSessionFactory().getCurrentSession() - .createQuery( "select eess.id, eess.sourceExperiment.id from ExpressionExperimentSubSet eess" ) - .list(); - subsetIdToExperimentId = r2.stream().collect( Collectors.toMap( row -> ( Long ) row[0], row -> ( Long ) row[1] ) ); - } - Query query = getSessionFactory().getCurrentSession() - .createQuery( "select dear, e.id from DifferentialExpressionAnalysisResult dear " - + "join dear.resultSet dears " - + "join dears.analysis dea " - + "join dea.experimentAnalyzed e " - + "where dear.probe.id in :probeIds" - + ( bioAssaySetIds != null ? " and e.id in :bioAssaySetIds" : "" ) ) - .setParameterList( "probeIds", optimizeParameterList( probeIds ) ); - List result; - if ( bioAssaySetIds != null ) { - // this batch size has been optimized on the *fetch everything* worst case scenario - result = listByBatch( query, "bioAssaySetIds", bioAssaySetIds, 2048 ); - } else { - //noinspection unchecked - result = query.list(); - } - List rs = new ArrayList<>( result.size() ); - for ( Object[] row : result ) { - DifferentialExpressionAnalysisResult r = ( DifferentialExpressionAnalysisResult ) row[0]; - rs.add( r ); - if ( result2ExperimentId != null ) { - result2ExperimentId.put( r, subsetIdToExperimentId.getOrDefault( ( Long ) row[1], ( Long ) row[1] ) ); - } - } - return rs; - } - @Override public void remove( DifferentialExpressionAnalysis analysis ) { log.info( "Removing " + analysis + "..." ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisService.java index b403da4fc4..70f01865e5 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisService.java @@ -20,20 +20,17 @@ import org.springframework.security.access.annotation.Secured; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis; -import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisValueObject; import ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet; import ubic.gemma.model.expression.experiment.BioAssaySet; import ubic.gemma.model.expression.experiment.ExperimentalFactor; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.expression.experiment.ExpressionExperimentDetailsValueObject; -import ubic.gemma.model.genome.Gene; import ubic.gemma.model.genome.Taxon; import ubic.gemma.persistence.service.BaseService; import ubic.gemma.persistence.service.analysis.SingleExperimentAnalysisService; import javax.annotation.CheckReturnValue; -import javax.annotation.Nullable; import java.util.Collection; import java.util.List; import java.util.Map; @@ -87,17 +84,6 @@ Collection find( ubic.gemma.model.genome.Gene ge @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "AFTER_ACL_COLLECTION_READ" }) Collection findExperimentsWithAnalyses( ubic.gemma.model.genome.Gene gene ); - /** - * Retrieve differential expression results for a given gene. - * @param initializeProbes if true, initialize probes - * @param initializeContrasts if true, initialize contrasts - * @param pickBestByDataset pick the best analysis result by dataset (i.e. the one with the lowest P-value), - * otherwise all results are returned. - * @see DifferentialExpressionAnalysisDao#findResultsByGene(Gene, Collection, Map) - */ - @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY" }) - List findResultsByGene( Gene gene, @Nullable Collection experimentIds, @Nullable Map result2ExperimentId, boolean initializeProbes, boolean initializeContrasts, boolean pickBestByDataset ); - @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "ACL_SECURABLE_READ", "AFTER_ACL_COLLECTION_READ" }) Collection getAnalyses( BioAssaySet expressionExperiment ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java index 0157f605b6..ab4dda750d 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java @@ -116,51 +116,6 @@ public Collection findExperimentsWithAnalyses( Gene gene ) { return this.differentialExpressionAnalysisDao.findExperimentsWithAnalyses( gene ); } - @Override - @Transactional(readOnly = true) - public List findResultsByGene( Gene gene, @Nullable Collection experimentIds, @Nullable Map result2ExperimentId, boolean initializeProbes, boolean initializeContrasts, boolean pickBestByDataset ) { - StopWatch timer = StopWatch.createStarted(); - if ( pickBestByDataset && result2ExperimentId == null ) { - result2ExperimentId = new HashMap<>(); - } - List results = differentialExpressionAnalysisDao.findResultsByGene( gene, experimentIds, result2ExperimentId ); - long queryingMs = timer.getTime(); - if ( pickBestByDataset ) { - results = pickBestByDataset( results, result2ExperimentId ); - } - long probesMs = timer.getTime(); - if ( initializeProbes ) { - for ( DifferentialExpressionAnalysisResult result : results ) { - Hibernate.initialize( result.getProbe() ); - } - } - probesMs = timer.getTime() - probesMs; - long contrastsMs = timer.getTime(); - if ( initializeContrasts ) { - for ( DifferentialExpressionAnalysisResult result : results ) { - Hibernate.initialize( result.getContrasts() ); - } - } - contrastsMs = timer.getTime() - contrastsMs; - if ( timer.getTime() > 1000 ) { - log.warn( String.format( "Retrieving %d diffex results for %s took %d ms (querying results: %d ms, initializing probes: %d ms, initializing contrasts: %d ms)", - results.size(), gene, timer.getTime(), queryingMs, probesMs, contrastsMs ) ); - } - return results; - } - - /** - * Pick the best analysis result per dataset. - */ - private List pickBestByDataset( List rs, Map result2ExperimentIdMap ) { - Map bestByDataset = rs.stream() - .collect( Collectors.groupingBy( - result2ExperimentIdMap::get, - Collectors.collectingAndThen( Collectors.toList(), ( List l ) -> - l.stream().min( Comparator.comparing( DifferentialExpressionAnalysisResult::getPvalue, Comparator.nullsLast( Comparator.naturalOrder() ) ) ).orElseThrow( IllegalStateException::new ) ) ) ); - return new ArrayList<>( bestByDataset.values() ); - } - @Override @Transactional(readOnly = true) public Collection getAnalyses( BioAssaySet expressionExperiment ) { diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java index e89cac8dfa..c107796db7 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java @@ -34,6 +34,17 @@ */ public interface DifferentialExpressionResultDao extends BaseDao { + /** + * Retrieve differential expression results for a given gene across all the given datasets. + * + * @param gene a specific gene to retrieve differential expression for + * @param experimentAnalyzedIds list of IDs of experiments or experiment subsets to consider + * @param includeSubsets include results from experiment subsets + * @param groupBySourceExperiment if true, results part of a subset are grouped by their source experiment + * @return differential expression results, grouped by experiment ID + */ + Map> findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, boolean includeSubsets, boolean groupBySourceExperiment ); + /** * Find differential expression for a gene in given data sets, exceeding a given significance level (using the * corrected pvalue field) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java index e5549886ca..e67ea5adda 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java @@ -21,6 +21,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; import org.hibernate.*; +import org.hibernate.type.StandardBasicTypes; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; import org.springframework.util.Assert; @@ -36,6 +37,7 @@ import ubic.gemma.model.genome.gene.GeneValueObject; import ubic.gemma.persistence.service.AbstractDao; import ubic.gemma.persistence.util.CommonQueries; +import ubic.gemma.persistence.util.QueryUtils; import java.math.BigInteger; import java.util.*; @@ -84,6 +86,62 @@ public void remove( DifferentialExpressionAnalysisResult entity ) { throw new UnsupportedOperationException( "Results cannot be removed directly, use DifferentialExpressionAnalysisDao.remove() instead." ); } + @Override + public Map> findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, boolean includeSubsets, boolean groupBySourceExperiment ) { + Assert.notNull( gene.getId(), "The gene must have a non-null ID." ); + if ( experimentAnalyzedIds.isEmpty() ) { + return Collections.emptyMap(); + } + //noinspection unchecked + List probeIds = getSessionFactory().getCurrentSession() + .createSQLQuery( "select CS from GENE2CS where GENE = :geneId" ) + .addScalar( "CS", StandardBasicTypes.LONG ) + .setParameter( "geneId", gene.getId() ) + .list(); + if ( probeIds.isEmpty() ) { + log.warn( String.format( "%s has no associated probes in the GENE2CS table, no differential expression results will be returned.", gene ) ); + return Collections.emptyMap(); + } + Set bioAssaySetIds = new HashSet<>( experimentAnalyzedIds ); + Map subsetIdToExperimentId = null; + // create a mapping of subset ID to source experiment ID + if ( groupBySourceExperiment ) { + subsetIdToExperimentId = QueryUtils.streamByBatch( getSessionFactory().getCurrentSession() + .createQuery( "select eess.id, eess.sourceExperiment.id from ExpressionExperimentSubSet eess" + + " where eess.sourceExperiment.id in :eeIds or eess.id in :eeIds" ), "eeIds", experimentAnalyzedIds, 2048, Object[].class ) + .collect( Collectors.toMap( row -> ( Long ) row[0], row -> ( Long ) row[1] ) ); + if ( includeSubsets ) { + bioAssaySetIds.addAll( subsetIdToExperimentId.keySet() ); + } + } else if ( includeSubsets ) { + List subsetIds = QueryUtils.listByBatch( getSessionFactory().getCurrentSession() + .createQuery( "select eess.id from ExpressionExperimentSubSet eess" + + " where eess.sourceExperiment.id in :eeIds or eess.id in :eeIds" ), "eeIds", experimentAnalyzedIds, 2048 ); + bioAssaySetIds.addAll( subsetIds ); + } + Query query = getSessionFactory().getCurrentSession() + .createQuery( "select dear, e.id from DifferentialExpressionAnalysisResult dear " + + "join fetch dear.contrasts cr " + + "join dear.resultSet dears " + + "join dears.analysis dea " + + "join dea.experimentAnalyzed e " + + "where dear.probe.id in :probeIds and e.id in :bioAssaySetIds" ) + .setParameterList( "probeIds", optimizeParameterList( probeIds ) ); + List result = QueryUtils.listByBatch( query, "bioAssaySetIds", bioAssaySetIds, 2048 ); + Map> rs = new HashMap<>(); + for ( Object[] row : result ) { + DifferentialExpressionAnalysisResult r = ( DifferentialExpressionAnalysisResult ) row[0]; + Long key; + if ( groupBySourceExperiment ) { + key = subsetIdToExperimentId.getOrDefault( ( Long ) row[1], ( Long ) row[1] ); + } else { + key = ( Long ) row[1]; + } + rs.computeIfAbsent( key, k -> new ArrayList<>() ).add( r ); + } + return rs; + } + @Override public Map> findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentsAnalyzed, double threshold, int limit ) { diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java index a7122e09cf..79340f4d29 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java @@ -20,17 +20,12 @@ import org.springframework.security.access.annotation.Secured; import ubic.basecode.math.distribution.Histogram; -import ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet; import ubic.gemma.model.analysis.expression.diff.*; -import ubic.gemma.model.expression.experiment.ExperimentalFactor; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject; import ubic.gemma.model.genome.Gene; -import ubic.gemma.persistence.service.BaseImmutableService; import ubic.gemma.persistence.service.BaseReadOnlyService; -import ubic.gemma.persistence.service.BaseService; -import javax.annotation.CheckReturnValue; import java.util.Collection; import java.util.List; import java.util.Map; @@ -43,6 +38,17 @@ @SuppressWarnings("unused") // Possible external use public interface DifferentialExpressionResultService extends BaseReadOnlyService { + /** + * Retrieve the best differential expression results for a given gene. + *

+ * If a source experiment has more than one result for a given gene (i.e. multiple probe for the gene or multiple + * result sets), the best is picked according to its P-value. + * @see DifferentialExpressionResultDao#findByGeneAndExperimentAnalyzed(Gene, Collection, boolean, boolean) + * @return the best analysis results grouped by source experiment ID + */ + @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY" }) + Map findBestResultByGeneAndExperimentAnalyzedGroupedBySourceExperimentId( Gene gene, Collection experimentAnalyzedIds ); + /** * Given a list of experiments and a threshold value finds all the probes that met the cut off in the given * experiments diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java index bbaa875380..f23637a560 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java @@ -18,19 +18,17 @@ */ package ubic.gemma.persistence.service.analysis.expression.diff; +import org.apache.commons.lang3.time.StopWatch; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import ubic.basecode.math.distribution.Histogram; -import ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet; import ubic.gemma.model.analysis.expression.diff.*; import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject; import ubic.gemma.model.genome.Gene; import ubic.gemma.persistence.service.AbstractService; -import java.util.Collection; -import java.util.List; -import java.util.Map; +import java.util.*; /** * @author keshav @@ -48,6 +46,25 @@ public DifferentialExpressionResultServiceImpl( DifferentialExpressionResultDao this.DERDao = DERDao; } + @Override + @Transactional(readOnly = true) + public Map findBestResultByGeneAndExperimentAnalyzedGroupedBySourceExperimentId( Gene gene, Collection experimentAnalyzedIds ) { + StopWatch timer = StopWatch.createStarted(); + Map> resultsBySourceExperiment = DERDao.findByGeneAndExperimentAnalyzed( gene, experimentAnalyzedIds, true, true ); + Map bestResults = new HashMap<>(); + for ( Map.Entry> e : resultsBySourceExperiment.entrySet() ) { + DifferentialExpressionAnalysisResult bestResult = e.getValue().stream() + .min( Comparator.comparing( DifferentialExpressionAnalysisResult::getPvalue, Comparator.nullsLast( Comparator.naturalOrder() ) ) ) + .orElseThrow( IllegalStateException::new ); + bestResults.put( e.getKey(), bestResult ); + } + if ( timer.getTime() > 1000 ) { + log.warn( String.format( "Retrieving %d diffex results for %s took %d ms", + bestResults.size(), gene, timer.getTime() ) ); + } + return bestResults; + } + @Override @Transactional(readOnly = true) public Map> find( diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java index d815444f86..d2ceeee024 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java @@ -6,10 +6,13 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.test.context.ContextConfiguration; +import ubic.gemma.core.context.TestComponent; import ubic.gemma.core.util.test.BaseDatabaseTest; import ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet; +import ubic.gemma.model.expression.arrayDesign.ArrayDesign; +import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.genome.Gene; -import ubic.gemma.core.context.TestComponent; +import ubic.gemma.model.genome.Taxon; import java.util.Collections; @@ -31,6 +34,29 @@ public DifferentialExpressionResultDao differentialExpressionResultDao( SessionF @Autowired private DifferentialExpressionResultDao differentialExpressionResultDao; + @Test + public void testFindByGeneAndExperimentAnalyzedGroupingBySourceExperiment() { + Gene gene = new Gene(); + sessionFactory.getCurrentSession().persist( gene ); + Taxon taxon = new Taxon(); + sessionFactory.getCurrentSession().persist( taxon ); + ArrayDesign ad = new ArrayDesign(); + ad.setPrimaryTaxon( taxon ); + CompositeSequence cs = new CompositeSequence(); + ad.getCompositeSequences().add( cs ); + cs.setArrayDesign( ad ); + sessionFactory.getCurrentSession().persist( ad ); + sessionFactory.getCurrentSession().createSQLQuery( "insert into GENE2CS (GENE, CS, AD) values (?, ?, ?)" ) + .setParameter( 0, gene.getId() ) + .setParameter( 1, cs.getId() ) + .setParameter( 2, ad.getId() ) + .executeUpdate(); + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, true ); + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, false ); + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), false, true ); + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), false, false ); + } + @Test public void testFindByGene() { Gene gene = new Gene(); diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 8498d13bd3..1d8907a754 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -69,6 +69,7 @@ import ubic.gemma.model.genome.Taxon; import ubic.gemma.model.genome.TaxonValueObject; import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; +import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionResultService; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService; @@ -136,13 +137,14 @@ public class DatasetsWebService { private OntologyService ontologyService; @Autowired private ExpressionExperimentReportService expressionExperimentReportService; - @Autowired private DatasetArgService datasetArgService; @Autowired private GeneArgService geneArgService; @Autowired private TaxonArgService taxonArgService; + @Autowired + private DifferentialExpressionResultService differentialExpressionResultService; @Context private UriInfo uriInfo; @@ -735,6 +737,47 @@ public Response getDatasetDifferentialExpressionAnalysesResultSets( .build(); } + /** + * Obtain differential expression analysis results for a given gene. + */ + @GET + @GZIP + @Path("/analyses/differential/results/gene/{gene}") + @Produces(MediaType.APPLICATION_JSON) + @Operation(summary = "Retrieve the differential expression results for a given gene") + public QueriedAndFilteredResponseDataObject getDatasetsDifferentialAnalysisResultsExpressionForGene( + @PathParam("gene") GeneArg geneArg, + @QueryParam("query") QueryArg query, + @QueryParam("filter") @DefaultValue("") FilterArg filter + ) { + Gene gene = geneArgService.getEntity( geneArg ); + Filters filters = datasetArgService.getFilters( filter ); + Set ids = new HashSet<>( expressionExperimentService.loadIdsWithCache( filters, null ) ); + if ( query != null ) { + ids.retainAll( datasetArgService.getIdsForSearchQuery( query ) ); + } + List payload; + payload = differentialExpressionResultService.findBestResultByGeneAndExperimentAnalyzedGroupedBySourceExperimentId( gene, ids ).entrySet().stream() + .map( e -> new DifferentialExpressionAnalysisResultByGeneValueObject( e.getValue(), e.getKey() ) ) + .sorted( Comparator.comparing( DifferentialExpressionAnalysisResultByGeneValueObject::getPValue, Comparator.nullsLast( Comparator.naturalOrder() ) ) ) + .collect( Collectors.toList() ); + return Responders.all( payload, query != null ? query.getValue() : null, filters, new String[] { "datasetId" }, Sort.by( null, "pValue", Sort.Direction.ASC, "pValue" ) ); + } + + @Data + @EqualsAndHashCode(callSuper = true) + public static class DifferentialExpressionAnalysisResultByGeneValueObject extends DifferentialExpressionAnalysisResultValueObject { + + private Long datasetId; + private Long resultSetId; + + public DifferentialExpressionAnalysisResultByGeneValueObject( DifferentialExpressionAnalysisResult result, Long datasetId ) { + super( result ); + this.datasetId = datasetId; + this.resultSetId = result.getResultSet().getId(); + } + } + /** * Retrieves the annotations for the given dataset. * @@ -1096,46 +1139,6 @@ public ResponseDataObject> getDatase ); } - /** - * Obtain differential expression results for a given gene. - */ - @GET - @Path("/differential/genes/{gene}") - @Produces(MediaType.APPLICATION_JSON) - @Operation(summary = "Retrieve the differential expression results for a given gene") - public ResponseDataObject> getDatasetsDifferentialExpressionForGene( - @PathParam("gene") GeneArg geneArg, - @QueryParam("query") QueryArg query, - @QueryParam("filter") @DefaultValue("") FilterArg filter - ) { - Gene gene = geneArgService.getEntity( geneArg ); - Set ids = new HashSet<>( expressionExperimentService.loadIdsWithCache( datasetArgService.getFilters( filter ), null ) ); - if ( query != null ) { - ids.retainAll( datasetArgService.getIdsForSearchQuery( query ) ); - } - if ( ids.isEmpty() ) { - return respond( Collections.emptyList() ); - } - Map datasetByResult = new HashMap<>(); - return respond( differentialExpressionAnalysisService.findResultsByGene( gene, ids, datasetByResult, false, true, true ).stream() - .map( r -> new DifferentialExpressionAnalysisResultWithDatasetIdValueObject( r, datasetByResult.get( r ) ) ) - .collect( Collectors.toList() ) ); - } - - @Data - @EqualsAndHashCode(callSuper = true) - public static class DifferentialExpressionAnalysisResultWithDatasetIdValueObject extends DifferentialExpressionAnalysisResultValueObject { - - private Long datasetId; - private Long resultSetId; - - public DifferentialExpressionAnalysisResultWithDatasetIdValueObject( DifferentialExpressionAnalysisResult result, Long datasetId ) { - super( result ); - this.datasetId = datasetId; - this.resultSetId = result.getResultSet().getId(); - } - } - /** * Retrieve a "refreshed" dataset. *

diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/Responders.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/Responders.java index 0a77193b9f..c5feabdc94 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/Responders.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/Responders.java @@ -54,6 +54,10 @@ public static FilteredResponseDataObject all( List payload, @Nullable return new FilteredResponseDataObject<>( payload, filters, groupBy, sort ); } + public static QueriedAndFilteredResponseDataObject all( List payload, @Nullable String query, @Nullable Filters filters, String[] groupBy, @Nullable Sort sort ) { + return new QueriedAndFilteredResponseDataObject<>( payload, query, filters, groupBy, sort ); + } + /** * Produce top results. */ diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java index cd5d5439b5..67b66afd7a 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java @@ -32,7 +32,9 @@ import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject; +import ubic.gemma.model.genome.Gene; import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; +import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionResultService; import ubic.gemma.persistence.service.analysis.expression.diff.ExpressionAnalysisResultSetService; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; import ubic.gemma.persistence.service.common.quantitationtype.QuantitationTypeService; @@ -111,6 +113,11 @@ public DifferentialExpressionAnalysisService differentialExpressionAnalysisServi return mock( DifferentialExpressionAnalysisService.class ); } + @Bean + public DifferentialExpressionResultService differentialExpressionResultService() { + return mock( DifferentialExpressionResultService.class ); + } + @Bean public AuditEventService auditEventService() { return mock( AuditEventService.class ); @@ -203,7 +210,10 @@ public ExpressionExperimentReportService expressionExperimentReportService() { private SearchService searchService; @Autowired - private ExpressionAnalysisResultSetService expressionAnalysisResultSetService; + private GeneArgService geneArgService; + + @Autowired + private DifferentialExpressionResultService differentialExpressionResultService; private ExpressionExperiment ee; @@ -541,12 +551,15 @@ public void testGetDatasetAnnotations() { } @Test - public void testGetDatasetsAnalysisResultSets() { - ee.setId( 1L ); - when( expressionAnalysisResultSetService.findByBioAssaySetInAndDatabaseEntryInLimit( any(), isNull(), isNull(), anyInt(), anyInt(), isNull() ) ) - .thenReturn( new Slice<>( Collections.emptyList(), null, null, null, null ) ); - assertThat( target( "/datasets/1/analyses/differential/resultSets" ).request().get() ) - .hasStatus( Response.Status.OK ); + public void testGetDatasetsDifferentialAnalysisResultsExpressionForGene() { + Gene brca1 = new Gene(); + when( geneArgService.getEntity( any() ) ).thenReturn( brca1 ); + assertThat( target( "/datasets/analyses/differential/results/gene/BRCA1" ).request().get() ) + .hasStatus( Response.Status.OK ) + .entity() + .hasFieldOrPropertyWithValue( "filter", "" ) + .hasFieldOrPropertyWithValue( "sort", "+pValue" ); + verify( differentialExpressionResultService ).findBestResultByGeneAndExperimentAnalyzedGroupedBySourceExperimentId( eq( brca1 ), any() ); } @Autowired From 948265cb56b57636325a3f4b29aa8dd822549724 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 7 May 2024 12:26:47 -0700 Subject: [PATCH 23/81] Explicitly ignore uninitialzed VO fields --- .../ubic/gemma/rest/DatasetsWebService.java | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 1d8907a754..1e759b1152 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -14,6 +14,7 @@ */ package ubic.gemma.rest; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonInclude; import io.swagger.v3.oas.annotations.Operation; @@ -68,6 +69,7 @@ import ubic.gemma.model.genome.Gene; import ubic.gemma.model.genome.Taxon; import ubic.gemma.model.genome.TaxonValueObject; +import ubic.gemma.model.genome.gene.GeneValueObject; import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionResultService; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; @@ -776,6 +778,24 @@ public DifferentialExpressionAnalysisResultByGeneValueObject( DifferentialExpres this.datasetId = datasetId; this.resultSetId = result.getResultSet().getId(); } + + @Override + @JsonIgnore + public Long getProbeId() { + return super.getProbeId(); + } + + @Override + @JsonIgnore + public String getProbeName() { + return super.getProbeName(); + } + + @Override + @JsonIgnore + public List getGenes() { + return super.getGenes(); + } } /** From ab93eee2c3705ddd86f08c995013f74dc4a27145 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 7 May 2024 12:41:56 -0700 Subject: [PATCH 24/81] Retain both source experiment and experiment IDs --- .../diff/DifferentialExpressionResultDao.java | 5 ++++- .../diff/DifferentialExpressionResultDaoImpl.java | 11 +++++++++-- .../diff/DifferentialExpressionResultService.java | 2 +- .../DifferentialExpressionResultServiceImpl.java | 4 ++-- .../diff/DifferentialExpressionResultDaoTest.java | 12 ++++++++---- .../java/ubic/gemma/rest/DatasetsWebService.java | 14 ++++++++------ .../ubic/gemma/rest/DatasetsWebServiceTest.java | 2 +- 7 files changed, 33 insertions(+), 17 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java index c107796db7..bb2b5db705 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java @@ -25,6 +25,7 @@ import ubic.gemma.model.genome.Gene; import ubic.gemma.persistence.service.BaseDao; +import javax.annotation.Nullable; import java.util.Collection; import java.util.List; import java.util.Map; @@ -41,9 +42,11 @@ public interface DifferentialExpressionResultDao extends BaseDao> findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, boolean includeSubsets, boolean groupBySourceExperiment ); + Map> findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, boolean includeSubsets, boolean groupBySourceExperiment, @Nullable Map experimentAnalyzedIdMap ); /** * Find differential expression for a gene in given data sets, exceeding a given significance level (using the diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java index e67ea5adda..cb22f81da3 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java @@ -39,6 +39,7 @@ import ubic.gemma.persistence.util.CommonQueries; import ubic.gemma.persistence.util.QueryUtils; +import javax.annotation.Nullable; import java.math.BigInteger; import java.util.*; import java.util.stream.Collectors; @@ -87,8 +88,10 @@ public void remove( DifferentialExpressionAnalysisResult entity ) { } @Override - public Map> findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, boolean includeSubsets, boolean groupBySourceExperiment ) { + public Map> findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, boolean includeSubsets, boolean groupBySourceExperiment, @Nullable Map experimentAnalyzedIdMap ) { Assert.notNull( gene.getId(), "The gene must have a non-null ID." ); + Assert.isTrue( groupBySourceExperiment || experimentAnalyzedIdMap == null, + "The experiment analyzed ID mapping is only useful if results are grouped by source experiment." ); if ( experimentAnalyzedIds.isEmpty() ) { return Collections.emptyMap(); } @@ -131,9 +134,13 @@ public Map> findByGeneAndExperi Map> rs = new HashMap<>(); for ( Object[] row : result ) { DifferentialExpressionAnalysisResult r = ( DifferentialExpressionAnalysisResult ) row[0]; + Long bioAssaySetId = ( Long ) row[1]; Long key; if ( groupBySourceExperiment ) { - key = subsetIdToExperimentId.getOrDefault( ( Long ) row[1], ( Long ) row[1] ); + key = subsetIdToExperimentId.getOrDefault( bioAssaySetId, bioAssaySetId ); + if ( experimentAnalyzedIdMap != null ) { + experimentAnalyzedIdMap.put( r, bioAssaySetId ); + } } else { key = ( Long ) row[1]; } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java index 79340f4d29..da9d712e1f 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java @@ -47,7 +47,7 @@ public interface DifferentialExpressionResultService extends BaseReadOnlyService * @return the best analysis results grouped by source experiment ID */ @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY" }) - Map findBestResultByGeneAndExperimentAnalyzedGroupedBySourceExperimentId( Gene gene, Collection experimentAnalyzedIds ); + Map findBestResultByGeneAndExperimentAnalyzedGroupedBySourceExperimentId( Gene gene, Collection experimentAnalyzedIds, Map bioAssaySetIdMap ); /** * Given a list of experiments and a threshold value finds all the probes that met the cut off in the given diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java index f23637a560..c669611319 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java @@ -48,9 +48,9 @@ public DifferentialExpressionResultServiceImpl( DifferentialExpressionResultDao @Override @Transactional(readOnly = true) - public Map findBestResultByGeneAndExperimentAnalyzedGroupedBySourceExperimentId( Gene gene, Collection experimentAnalyzedIds ) { + public Map findBestResultByGeneAndExperimentAnalyzedGroupedBySourceExperimentId( Gene gene, Collection experimentAnalyzedIds, Map bioAssaySetIdMap ) { StopWatch timer = StopWatch.createStarted(); - Map> resultsBySourceExperiment = DERDao.findByGeneAndExperimentAnalyzed( gene, experimentAnalyzedIds, true, true ); + Map> resultsBySourceExperiment = DERDao.findByGeneAndExperimentAnalyzed( gene, experimentAnalyzedIds, bioAssaySetIdMap, true, true ); Map bestResults = new HashMap<>(); for ( Map.Entry> e : resultsBySourceExperiment.entrySet() ) { DifferentialExpressionAnalysisResult bestResult = e.getValue().stream() diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java index d2ceeee024..573d4aeeed 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java @@ -15,7 +15,9 @@ import ubic.gemma.model.genome.Taxon; import java.util.Collections; +import java.util.HashMap; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.mockito.Mockito.mock; @ContextConfiguration @@ -51,10 +53,12 @@ public void testFindByGeneAndExperimentAnalyzedGroupingBySourceExperiment() { .setParameter( 1, cs.getId() ) .setParameter( 2, ad.getId() ) .executeUpdate(); - differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, true ); - differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, false ); - differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), false, true ); - differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), false, false ); + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, true, null ); + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, false, null ); + assertThatThrownBy( () -> differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, false, new HashMap<>() ) ) + .isInstanceOf( IllegalArgumentException.class ); + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), false, true, null ); + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), false, false, null ); } @Test diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 1e759b1152..8a8ec04ea7 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -758,9 +758,9 @@ public QueriedAndFilteredResponseDataObject payload; - payload = differentialExpressionResultService.findBestResultByGeneAndExperimentAnalyzedGroupedBySourceExperimentId( gene, ids ).entrySet().stream() - .map( e -> new DifferentialExpressionAnalysisResultByGeneValueObject( e.getValue(), e.getKey() ) ) + Map bioAssaySetIdMap = new HashMap<>(); + List payload = differentialExpressionResultService.findBestResultByGeneAndExperimentAnalyzedGroupedBySourceExperimentId( gene, ids, bioAssaySetIdMap ).entrySet().stream() + .map( e -> new DifferentialExpressionAnalysisResultByGeneValueObject( e.getValue(), e.getKey(), bioAssaySetIdMap.get( e.getValue() ) ) ) .sorted( Comparator.comparing( DifferentialExpressionAnalysisResultByGeneValueObject::getPValue, Comparator.nullsLast( Comparator.naturalOrder() ) ) ) .collect( Collectors.toList() ); return Responders.all( payload, query != null ? query.getValue() : null, filters, new String[] { "datasetId" }, Sort.by( null, "pValue", Sort.Direction.ASC, "pValue" ) ); @@ -770,12 +770,14 @@ public QueriedAndFilteredResponseDataObject Date: Tue, 7 May 2024 12:50:25 -0700 Subject: [PATCH 25/81] Fix grouping by field and add some documentation --- .../java/ubic/gemma/rest/DatasetsWebService.java | 13 ++++++++++++- .../ubic/gemma/rest/DatasetsWebServiceTest.java | 14 +++++++++----- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 8a8ec04ea7..47348e84b2 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -65,6 +65,7 @@ import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.expression.experiment.ExpressionExperimentDetailsValueObject; +import ubic.gemma.model.expression.experiment.ExpressionExperimentSubSet; import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject; import ubic.gemma.model.genome.Gene; import ubic.gemma.model.genome.Taxon; @@ -763,15 +764,25 @@ public QueriedAndFilteredResponseDataObject new DifferentialExpressionAnalysisResultByGeneValueObject( e.getValue(), e.getKey(), bioAssaySetIdMap.get( e.getValue() ) ) ) .sorted( Comparator.comparing( DifferentialExpressionAnalysisResultByGeneValueObject::getPValue, Comparator.nullsLast( Comparator.naturalOrder() ) ) ) .collect( Collectors.toList() ); - return Responders.all( payload, query != null ? query.getValue() : null, filters, new String[] { "datasetId" }, Sort.by( null, "pValue", Sort.Direction.ASC, "pValue" ) ); + return Responders.all( payload, query != null ? query.getValue() : null, filters, new String[] { "sourceExperimentId" }, Sort.by( null, "pValue", Sort.Direction.ASC, "pValue" ) ); } @Data @EqualsAndHashCode(callSuper = true) public static class DifferentialExpressionAnalysisResultByGeneValueObject extends DifferentialExpressionAnalysisResultValueObject { + /** + * The ID of the source experiment, which differs only if this result is from a subset. This is always referring + * to an {@link ExpressionExperiment}. + */ private Long sourceExperimentId; + /** + * The ID of the experiment analyzed which is either an {@link ExpressionExperiment} or an {@link ExpressionExperimentSubSet}. + */ private Long experimentAnalyzedId; + /** + * The result set ID to which this result belong. + */ private Long resultSetId; public DifferentialExpressionAnalysisResultByGeneValueObject( DifferentialExpressionAnalysisResult result, Long sourceExperimentId, Long experimentAnalyzedId ) { diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java index e6fe3a2de8..890c3d0dab 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java @@ -2,7 +2,6 @@ import io.swagger.v3.oas.models.OpenAPI; import io.swagger.v3.oas.models.info.Info; -import org.assertj.core.api.InstanceOfAssertFactories; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -60,6 +59,7 @@ import java.util.stream.Collectors; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.InstanceOfAssertFactories.list; import static org.mockito.Mockito.*; import static ubic.gemma.rest.util.Assertions.assertThat; @@ -380,7 +380,7 @@ public void testGetDatasetsAnnotationsWithRetainMentionedTerms() throws TimeoutE .hasFieldOrPropertyWithValue( "limit", 100 ) .hasFieldOrPropertyWithValue( "sort.orderBy", "numberOfExpressionExperiments" ) .hasFieldOrPropertyWithValue( "sort.direction", "-" ) - .extracting( "groupBy", InstanceOfAssertFactories.list( String.class ) ) + .extracting( "groupBy", list( String.class ) ) .containsExactly( "classUri", "className", "termUri", "termName" ); verify( expressionExperimentService ).getFiltersWithInferredAnnotations( Filters.empty(), Collections.emptySet(), new HashSet<>(), 30, TimeUnit.SECONDS ); verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, null, 0, Collections.emptySet(), 100 ); @@ -396,7 +396,7 @@ public void testGetDatasetsAnnotations() throws TimeoutException { .hasFieldOrPropertyWithValue( "limit", 100 ) .hasFieldOrPropertyWithValue( "sort.orderBy", "numberOfExpressionExperiments" ) .hasFieldOrPropertyWithValue( "sort.direction", "-" ) - .extracting( "groupBy", InstanceOfAssertFactories.list( String.class ) ) + .extracting( "groupBy", list( String.class ) ) .containsExactly( "classUri", "className", "termUri", "termName" ); verify( expressionExperimentService ).getFiltersWithInferredAnnotations( Filters.empty(), null, new HashSet<>(), 30, TimeUnit.SECONDS ); verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, null, 0, null, 100 ); @@ -428,7 +428,7 @@ public void testGetDatasetsAnnotationsWithLimitIsSupplied() { .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ) .entity() .hasFieldOrPropertyWithValue( "limit", 50 ) - .extracting( "groupBy", InstanceOfAssertFactories.list( String.class ) ) + .extracting( "groupBy", list( String.class ) ) .containsExactly( "classUri", "className", "termUri", "termName" ); verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, null, 0, null, 50 ); } @@ -556,9 +556,13 @@ public void testGetDatasetsDifferentialAnalysisResultsExpressionForGene() { when( geneArgService.getEntity( any() ) ).thenReturn( brca1 ); assertThat( target( "/datasets/analyses/differential/results/gene/BRCA1" ).request().get() ) .hasStatus( Response.Status.OK ) + .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ) + .hasEncoding( "gzip" ) .entity() .hasFieldOrPropertyWithValue( "filter", "" ) - .hasFieldOrPropertyWithValue( "sort", "+pValue" ); + .hasFieldOrPropertyWithValue( "sort", "+pValue" ) + .extracting( "groupBy", list( String.class ) ) + .containsExactly( "sourceExperimentId" ); verify( differentialExpressionResultService ).findBestResultByGeneAndExperimentAnalyzedGroupedBySourceExperimentId( eq( brca1 ), any(), any() ); } From dd200183a1212696e0225879217d70600c236073 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 7 May 2024 12:54:30 -0700 Subject: [PATCH 26/81] Fix call to findByGeneAndExperimentAnalyzed() --- .../expression/diff/DifferentialExpressionResultService.java | 2 +- .../diff/DifferentialExpressionResultServiceImpl.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java index da9d712e1f..d1e38024d9 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java @@ -43,7 +43,7 @@ public interface DifferentialExpressionResultService extends BaseReadOnlyService *

* If a source experiment has more than one result for a given gene (i.e. multiple probe for the gene or multiple * result sets), the best is picked according to its P-value. - * @see DifferentialExpressionResultDao#findByGeneAndExperimentAnalyzed(Gene, Collection, boolean, boolean) + * @see DifferentialExpressionResultDao#findByGeneAndExperimentAnalyzed(Gene, Collection, boolean, boolean, Map) * @return the best analysis results grouped by source experiment ID */ @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY" }) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java index c669611319..ef15cccadb 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java @@ -50,7 +50,7 @@ public DifferentialExpressionResultServiceImpl( DifferentialExpressionResultDao @Transactional(readOnly = true) public Map findBestResultByGeneAndExperimentAnalyzedGroupedBySourceExperimentId( Gene gene, Collection experimentAnalyzedIds, Map bioAssaySetIdMap ) { StopWatch timer = StopWatch.createStarted(); - Map> resultsBySourceExperiment = DERDao.findByGeneAndExperimentAnalyzed( gene, experimentAnalyzedIds, bioAssaySetIdMap, true, true ); + Map> resultsBySourceExperiment = DERDao.findByGeneAndExperimentAnalyzed( gene, experimentAnalyzedIds, true, true, bioAssaySetIdMap ); Map bestResults = new HashMap<>(); for ( Map.Entry> e : resultsBySourceExperiment.entrySet() ) { DifferentialExpressionAnalysisResult bestResult = e.getValue().stream() From becbc079b9fcdbc672346e949944b1d84ede882d Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 7 May 2024 13:05:10 -0700 Subject: [PATCH 27/81] Use corrected P-values for picking the best result by dataset --- .../expression/diff/DifferentialExpressionResultService.java | 2 +- .../diff/DifferentialExpressionResultServiceImpl.java | 2 +- .../src/main/java/ubic/gemma/rest/DatasetsWebService.java | 2 +- .../src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java index d1e38024d9..88590e6a80 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java @@ -42,7 +42,7 @@ public interface DifferentialExpressionResultService extends BaseReadOnlyService * Retrieve the best differential expression results for a given gene. *

* If a source experiment has more than one result for a given gene (i.e. multiple probe for the gene or multiple - * result sets), the best is picked according to its P-value. + * result sets), the best is picked according to its corrected P-value. * @see DifferentialExpressionResultDao#findByGeneAndExperimentAnalyzed(Gene, Collection, boolean, boolean, Map) * @return the best analysis results grouped by source experiment ID */ diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java index ef15cccadb..b8fd29924a 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java @@ -54,7 +54,7 @@ public Map findBestResultByGeneAndEx Map bestResults = new HashMap<>(); for ( Map.Entry> e : resultsBySourceExperiment.entrySet() ) { DifferentialExpressionAnalysisResult bestResult = e.getValue().stream() - .min( Comparator.comparing( DifferentialExpressionAnalysisResult::getPvalue, Comparator.nullsLast( Comparator.naturalOrder() ) ) ) + .min( Comparator.comparing( DifferentialExpressionAnalysisResult::getCorrectedPvalue, Comparator.nullsLast( Comparator.naturalOrder() ) ) ) .orElseThrow( IllegalStateException::new ); bestResults.put( e.getKey(), bestResult ); } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 47348e84b2..e5bbaa948f 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -764,7 +764,7 @@ public QueriedAndFilteredResponseDataObject new DifferentialExpressionAnalysisResultByGeneValueObject( e.getValue(), e.getKey(), bioAssaySetIdMap.get( e.getValue() ) ) ) .sorted( Comparator.comparing( DifferentialExpressionAnalysisResultByGeneValueObject::getPValue, Comparator.nullsLast( Comparator.naturalOrder() ) ) ) .collect( Collectors.toList() ); - return Responders.all( payload, query != null ? query.getValue() : null, filters, new String[] { "sourceExperimentId" }, Sort.by( null, "pValue", Sort.Direction.ASC, "pValue" ) ); + return Responders.all( payload, query != null ? query.getValue() : null, filters, new String[] { "sourceExperimentId" }, Sort.by( null, "correctedPvalue", Sort.Direction.ASC, "correctedPvalue" ) ); } @Data diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java index 890c3d0dab..a69b996677 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java @@ -560,7 +560,7 @@ public void testGetDatasetsDifferentialAnalysisResultsExpressionForGene() { .hasEncoding( "gzip" ) .entity() .hasFieldOrPropertyWithValue( "filter", "" ) - .hasFieldOrPropertyWithValue( "sort", "+pValue" ) + .hasFieldOrPropertyWithValue( "sort", "+correctedPvalue" ) .extracting( "groupBy", list( String.class ) ) .containsExactly( "sourceExperimentId" ); verify( differentialExpressionResultService ).findBestResultByGeneAndExperimentAnalyzedGroupedBySourceExperimentId( eq( brca1 ), any(), any() ); From b585773bfa87b3bdb1c57c9cb05b124955a51cbf Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 7 May 2024 13:48:46 -0700 Subject: [PATCH 28/81] Include all subset results --- .../diff/DifferentialExpressionResultDao.java | 17 +++++------ .../DifferentialExpressionResultDaoImpl.java | 30 +++++++++---------- .../DifferentialExpressionResultService.java | 9 ++---- ...fferentialExpressionResultServiceImpl.java | 21 ++++--------- .../DifferentialExpressionResultDaoTest.java | 12 +++----- .../ubic/gemma/rest/DatasetsWebService.java | 8 ++--- .../gemma/rest/DatasetsWebServiceTest.java | 4 +-- 7 files changed, 40 insertions(+), 61 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java index bb2b5db705..2274c03d6c 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java @@ -37,16 +37,15 @@ public interface DifferentialExpressionResultDao extends BaseDao + * If a gene maps to more than one probe, the result with the lowest corrected P-value is selected. + * @param gene a specific gene to retrieve differential expression for + * @param experimentAnalyzedIds list of IDs of experiments or experiment subsets to consider + * @param includeSubsets include results from experiment subsets + * @param sourceExperimentIdMap a mapping of results to source experiment ID + * @return differential expression results, grouped by analyzed experiment ID */ - Map> findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, boolean includeSubsets, boolean groupBySourceExperiment, @Nullable Map experimentAnalyzedIdMap ); + Map findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, boolean includeSubsets, @Nullable Map sourceExperimentIdMap ); /** * Find differential expression for a gene in given data sets, exceeding a given significance level (using the diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java index cb22f81da3..19a4b75898 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java @@ -88,13 +88,12 @@ public void remove( DifferentialExpressionAnalysisResult entity ) { } @Override - public Map> findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, boolean includeSubsets, boolean groupBySourceExperiment, @Nullable Map experimentAnalyzedIdMap ) { + public Map findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, boolean includeSubsets, @Nullable Map sourceExperimentIdMap ) { Assert.notNull( gene.getId(), "The gene must have a non-null ID." ); - Assert.isTrue( groupBySourceExperiment || experimentAnalyzedIdMap == null, - "The experiment analyzed ID mapping is only useful if results are grouped by source experiment." ); if ( experimentAnalyzedIds.isEmpty() ) { return Collections.emptyMap(); } + StopWatch timer = StopWatch.createStarted(); //noinspection unchecked List probeIds = getSessionFactory().getCurrentSession() .createSQLQuery( "select CS from GENE2CS where GENE = :geneId" ) @@ -108,7 +107,7 @@ public Map> findByGeneAndExperi Set bioAssaySetIds = new HashSet<>( experimentAnalyzedIds ); Map subsetIdToExperimentId = null; // create a mapping of subset ID to source experiment ID - if ( groupBySourceExperiment ) { + if ( sourceExperimentIdMap != null ) { subsetIdToExperimentId = QueryUtils.streamByBatch( getSessionFactory().getCurrentSession() .createQuery( "select eess.id, eess.sourceExperiment.id from ExpressionExperimentSubSet eess" + " where eess.sourceExperiment.id in :eeIds or eess.id in :eeIds" ), "eeIds", experimentAnalyzedIds, 2048, Object[].class ) @@ -128,23 +127,24 @@ public Map> findByGeneAndExperi + "join dear.resultSet dears " + "join dears.analysis dea " + "join dea.experimentAnalyzed e " - + "where dear.probe.id in :probeIds and e.id in :bioAssaySetIds" ) + + "where dear.probe.id in :probeIds and e.id in :bioAssaySetIds " + // if more than one probe is found, pick the one with the lowest corrected p-value + + "group by e order by dear.correctedPvalue" ) .setParameterList( "probeIds", optimizeParameterList( probeIds ) ); List result = QueryUtils.listByBatch( query, "bioAssaySetIds", bioAssaySetIds, 2048 ); - Map> rs = new HashMap<>(); + Map rs = new HashMap<>(); for ( Object[] row : result ) { DifferentialExpressionAnalysisResult r = ( DifferentialExpressionAnalysisResult ) row[0]; Long bioAssaySetId = ( Long ) row[1]; - Long key; - if ( groupBySourceExperiment ) { - key = subsetIdToExperimentId.getOrDefault( bioAssaySetId, bioAssaySetId ); - if ( experimentAnalyzedIdMap != null ) { - experimentAnalyzedIdMap.put( r, bioAssaySetId ); - } - } else { - key = ( Long ) row[1]; + rs.put( bioAssaySetId, r ); + if ( sourceExperimentIdMap != null ) { + sourceExperimentIdMap.put( r, subsetIdToExperimentId.getOrDefault( bioAssaySetId, bioAssaySetId ) ); } - rs.computeIfAbsent( key, k -> new ArrayList<>() ).add( r ); + } + // pick the best result by experiment + if ( timer.getTime() > 1000 ) { + log.warn( String.format( "Retrieving %d diffex results for %s took %d ms", + rs.size(), gene, timer.getTime() ) ); } return rs; } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java index 88590e6a80..fd48b9bdbc 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java @@ -39,15 +39,10 @@ public interface DifferentialExpressionResultService extends BaseReadOnlyService { /** - * Retrieve the best differential expression results for a given gene. - *

- * If a source experiment has more than one result for a given gene (i.e. multiple probe for the gene or multiple - * result sets), the best is picked according to its corrected P-value. - * @see DifferentialExpressionResultDao#findByGeneAndExperimentAnalyzed(Gene, Collection, boolean, boolean, Map) - * @return the best analysis results grouped by source experiment ID + * @see DifferentialExpressionResultDao#findByGeneAndExperimentAnalyzed(Gene, Collection, boolean, Map) */ @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY" }) - Map findBestResultByGeneAndExperimentAnalyzedGroupedBySourceExperimentId( Gene gene, Collection experimentAnalyzedIds, Map bioAssaySetIdMap ); + Map findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, Map bioAssaySetIdMap ); /** * Given a list of experiments and a threshold value finds all the probes that met the cut off in the given diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java index b8fd29924a..647b146b71 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java @@ -28,7 +28,9 @@ import ubic.gemma.model.genome.Gene; import ubic.gemma.persistence.service.AbstractService; -import java.util.*; +import java.util.Collection; +import java.util.List; +import java.util.Map; /** * @author keshav @@ -48,21 +50,8 @@ public DifferentialExpressionResultServiceImpl( DifferentialExpressionResultDao @Override @Transactional(readOnly = true) - public Map findBestResultByGeneAndExperimentAnalyzedGroupedBySourceExperimentId( Gene gene, Collection experimentAnalyzedIds, Map bioAssaySetIdMap ) { - StopWatch timer = StopWatch.createStarted(); - Map> resultsBySourceExperiment = DERDao.findByGeneAndExperimentAnalyzed( gene, experimentAnalyzedIds, true, true, bioAssaySetIdMap ); - Map bestResults = new HashMap<>(); - for ( Map.Entry> e : resultsBySourceExperiment.entrySet() ) { - DifferentialExpressionAnalysisResult bestResult = e.getValue().stream() - .min( Comparator.comparing( DifferentialExpressionAnalysisResult::getCorrectedPvalue, Comparator.nullsLast( Comparator.naturalOrder() ) ) ) - .orElseThrow( IllegalStateException::new ); - bestResults.put( e.getKey(), bestResult ); - } - if ( timer.getTime() > 1000 ) { - log.warn( String.format( "Retrieving %d diffex results for %s took %d ms", - bestResults.size(), gene, timer.getTime() ) ); - } - return bestResults; + public Map findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, Map sourceExperimentIdMap ) { + return DERDao.findByGeneAndExperimentAnalyzed( gene, experimentAnalyzedIds, true, sourceExperimentIdMap ); } @Override diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java index 573d4aeeed..88c8d047a2 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java @@ -15,9 +15,7 @@ import ubic.gemma.model.genome.Taxon; import java.util.Collections; -import java.util.HashMap; -import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.mockito.Mockito.mock; @ContextConfiguration @@ -53,12 +51,10 @@ public void testFindByGeneAndExperimentAnalyzedGroupingBySourceExperiment() { .setParameter( 1, cs.getId() ) .setParameter( 2, ad.getId() ) .executeUpdate(); - differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, true, null ); - differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, false, null ); - assertThatThrownBy( () -> differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, false, new HashMap<>() ) ) - .isInstanceOf( IllegalArgumentException.class ); - differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), false, true, null ); - differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), false, false, null ); + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, null ); + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, null ); + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), false, null ); + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), false, null ); } @Test diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index e5bbaa948f..b30ede89ab 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -759,12 +759,12 @@ public QueriedAndFilteredResponseDataObject bioAssaySetIdMap = new HashMap<>(); - List payload = differentialExpressionResultService.findBestResultByGeneAndExperimentAnalyzedGroupedBySourceExperimentId( gene, ids, bioAssaySetIdMap ).entrySet().stream() - .map( e -> new DifferentialExpressionAnalysisResultByGeneValueObject( e.getValue(), e.getKey(), bioAssaySetIdMap.get( e.getValue() ) ) ) + Map sourceExperimentIdMap = new HashMap<>(); + List payload = differentialExpressionResultService.findByGeneAndExperimentAnalyzed( gene, ids, sourceExperimentIdMap ).entrySet().stream() + .map( e -> new DifferentialExpressionAnalysisResultByGeneValueObject( e.getValue(), sourceExperimentIdMap.get( e.getValue() ), e.getKey() ) ) .sorted( Comparator.comparing( DifferentialExpressionAnalysisResultByGeneValueObject::getPValue, Comparator.nullsLast( Comparator.naturalOrder() ) ) ) .collect( Collectors.toList() ); - return Responders.all( payload, query != null ? query.getValue() : null, filters, new String[] { "sourceExperimentId" }, Sort.by( null, "correctedPvalue", Sort.Direction.ASC, "correctedPvalue" ) ); + return Responders.all( payload, query != null ? query.getValue() : null, filters, new String[] { "sourceExperimentId", "experimentAnalyzedId" }, Sort.by( null, "correctedPvalue", Sort.Direction.ASC, "correctedPvalue" ) ); } @Data diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java index a69b996677..9038ba25eb 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java @@ -562,8 +562,8 @@ public void testGetDatasetsDifferentialAnalysisResultsExpressionForGene() { .hasFieldOrPropertyWithValue( "filter", "" ) .hasFieldOrPropertyWithValue( "sort", "+correctedPvalue" ) .extracting( "groupBy", list( String.class ) ) - .containsExactly( "sourceExperimentId" ); - verify( differentialExpressionResultService ).findBestResultByGeneAndExperimentAnalyzedGroupedBySourceExperimentId( eq( brca1 ), any(), any() ); + .containsExactly( "sourceExperimentId", "experimentAnalyzedId" ); + verify( differentialExpressionResultService ).findByGeneAndExperimentAnalyzed( eq( brca1 ), any(), any() ); } @Autowired From d189aee10773420287867ee2c8775bf6513cb486 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 7 May 2024 15:00:31 -0700 Subject: [PATCH 29/81] Add a taxa parameter to disambiguate genes with identical symbols --- .../ubic/gemma/rest/DatasetsWebService.java | 50 ++++++++++++++++--- .../gemma/rest/DatasetsWebServiceTest.java | 24 ++++++++- 2 files changed, 65 insertions(+), 9 deletions(-) diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index b30ede89ab..66b9a23c27 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -753,7 +753,34 @@ public QueriedAndFilteredResponseDataObject filter ) { - Gene gene = geneArgService.getEntity( geneArg ); + return getDatasetsDifferentialExpressionAnalysisResultsForGeneInternal( null, geneArg, query, filter ); + } + + /** + * Obtain differential expression analysis results for a given gene in a given taxa. + */ + @GET + @GZIP + @Path("/analyses/differential/results/taxa/{taxa}/gene/{gene}") + @Produces(MediaType.APPLICATION_JSON) + @Operation(summary = "Retrieve the differential expression results for a given gene and taxa") + public QueriedAndFilteredResponseDataObject getDatasetsDifferentialAnalysisResultsExpressionForGeneInTaxa( + @PathParam("taxa") TaxonArg taxonArg, + @PathParam("gene") GeneArg geneArg, + @QueryParam("query") QueryArg query, + @QueryParam("filter") @DefaultValue("") FilterArg filter + ) { + return getDatasetsDifferentialExpressionAnalysisResultsForGeneInternal( taxonArg, geneArg, query, filter ); + } + + private QueriedAndFilteredResponseDataObject getDatasetsDifferentialExpressionAnalysisResultsForGeneInternal( @Nullable TaxonArg taxonArg, GeneArg geneArg, QueryArg query, FilterArg filter ) { + Gene gene; + if ( taxonArg != null ) { + Taxon taxon = taxonArgService.getEntity( taxonArg ); + gene = geneArgService.getEntityWithTaxon( geneArg, taxon ); + } else { + gene = geneArgService.getEntity( geneArg ); + } Filters filters = datasetArgService.getFilters( filter ); Set ids = new HashSet<>( expressionExperimentService.loadIdsWithCache( filters, null ) ); if ( query != null ) { @@ -1075,8 +1102,10 @@ public ResponseDataObject> getDatase public ResponseDataObject> getDatasetExpressionForGenes( // Params: @PathParam("datasets") DatasetArrayArg datasets, // Required @PathParam("genes") GeneArrayArg genes, // Required - @QueryParam("keepNonSpecific") @DefaultValue("false") Boolean keepNonSpecific, // Optional, default false - @QueryParam("consolidate") ExpLevelConsolidationArg consolidate // Optional, default everything is returned + @QueryParam("keepNonSpecific") @DefaultValue("false") Boolean + keepNonSpecific, // Optional, default false + @QueryParam("consolidate") ExpLevelConsolidationArg + consolidate // Optional, default everything is returned ) { return respond( processedExpressionDataVectorService .getExpressionLevels( datasetArgService.getEntities( datasets ), @@ -1116,8 +1145,10 @@ public ResponseDataObject> getDatase @PathParam("datasets") DatasetArrayArg datasets, // Required @QueryParam("component") @DefaultValue("1") Integer component, // Required, default 1 @QueryParam("limit") @DefaultValue("100") LimitArg limit, // Optional, default 100 - @QueryParam("keepNonSpecific") @DefaultValue("false") Boolean keepNonSpecific, // Optional, default false - @QueryParam("consolidate") ExpLevelConsolidationArg consolidate // Optional, default everything is returned + @QueryParam("keepNonSpecific") @DefaultValue("false") Boolean + keepNonSpecific, // Optional, default false + @QueryParam("consolidate") ExpLevelConsolidationArg + consolidate // Optional, default everything is returned ) { return respond( processedExpressionDataVectorService .getExpressionLevelsPca( datasetArgService.getEntities( datasets ), limit.getValueNoMaximum(), @@ -1154,13 +1185,16 @@ public ResponseDataObject> getDatase @Path("/{datasets}/expressions/differential") @Produces(MediaType.APPLICATION_JSON) @Operation(summary = "Retrieve the expression levels of a set of datasets subject to a threshold on their differential expressions") - public ResponseDataObject> getDatasetDifferentialExpression( // Params: + public ResponseDataObject> getDatasetDifferentialExpression + ( // Params: @PathParam("datasets") DatasetArrayArg datasets, // Required @QueryParam("diffExSet") Long diffExSet, // Required @QueryParam("threshold") @DefaultValue("1.0") Double threshold, // Optional, default 1.0 @QueryParam("limit") @DefaultValue("100") LimitArg limit, // Optional, default 100 - @QueryParam("keepNonSpecific") @DefaultValue("false") Boolean keepNonSpecific, // Optional, default false - @QueryParam("consolidate") ExpLevelConsolidationArg consolidate // Optional, default everything is returned + @QueryParam("keepNonSpecific") @DefaultValue("false") Boolean + keepNonSpecific, // Optional, default false + @QueryParam("consolidate") ExpLevelConsolidationArg + consolidate // Optional, default everything is returned ) { if ( diffExSet == null ) { throw new BadRequestException( "The 'diffExSet' query parameter must be supplied." ); diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java index 9038ba25eb..352c1e48d9 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java @@ -32,6 +32,7 @@ import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject; import ubic.gemma.model.genome.Gene; +import ubic.gemma.model.genome.Taxon; import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionResultService; import ubic.gemma.persistence.service.analysis.expression.diff.ExpressionAnalysisResultSetService; @@ -209,6 +210,9 @@ public ExpressionExperimentReportService expressionExperimentReportService() { @Autowired private SearchService searchService; + @Autowired + private TaxonArgService taxonArgService; + @Autowired private GeneArgService geneArgService; @@ -231,7 +235,7 @@ public void setUpMocks() throws TimeoutException { @After public void resetMocks() { - reset( expressionExperimentService, quantitationTypeService, analyticsProvider, expressionDataFileService ); + reset( expressionExperimentService, quantitationTypeService, analyticsProvider, expressionDataFileService, taxonArgService, geneArgService ); } @Test @@ -566,6 +570,24 @@ public void testGetDatasetsDifferentialAnalysisResultsExpressionForGene() { verify( differentialExpressionResultService ).findByGeneAndExperimentAnalyzed( eq( brca1 ), any(), any() ); } + @Test + public void testGetDatasetsDifferentialAnalysisResultsExpressionForGeneInTaxa() { + Taxon human = new Taxon(); + Gene brca1 = new Gene(); + when( taxonArgService.getEntity( any() ) ).thenReturn( human ); + when( geneArgService.getEntityWithTaxon( any(), eq( human ) ) ).thenReturn( brca1 ); + assertThat( target( "/datasets/analyses/differential/results/taxa/human/gene/BRCA1" ).request().get() ) + .hasStatus( Response.Status.OK ) + .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ) + .hasEncoding( "gzip" ) + .entity() + .hasFieldOrPropertyWithValue( "filter", "" ) + .hasFieldOrPropertyWithValue( "sort", "+correctedPvalue" ) + .extracting( "groupBy", list( String.class ) ) + .containsExactly( "sourceExperimentId", "experimentAnalyzedId" ); + verify( differentialExpressionResultService ).findByGeneAndExperimentAnalyzed( eq( brca1 ), any(), any() ); + } + @Autowired private ExpressionExperimentReportService expressionExperimentReportService; From 0e3c61728ba60a9e950a5f22e20fe51fdcec4524 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 7 May 2024 15:23:39 -0700 Subject: [PATCH 30/81] Add a threshold parameter for the corrected P-value --- .../diff/DifferentialExpressionResultDao.java | 4 +++- .../DifferentialExpressionResultDaoImpl.java | 8 +++++--- .../DifferentialExpressionResultService.java | 4 ++-- ...DifferentialExpressionResultServiceImpl.java | 5 ++--- .../DifferentialExpressionResultDaoTest.java | 13 +++++++++---- .../ubic/gemma/rest/DatasetsWebService.java | 17 +++++++++++------ .../ubic/gemma/rest/DatasetsWebServiceTest.java | 4 ++-- 7 files changed, 34 insertions(+), 21 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java index 2274c03d6c..3a00896d17 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java @@ -39,13 +39,15 @@ public interface DifferentialExpressionResultDao extends BaseDao * If a gene maps to more than one probe, the result with the lowest corrected P-value is selected. + * * @param gene a specific gene to retrieve differential expression for * @param experimentAnalyzedIds list of IDs of experiments or experiment subsets to consider * @param includeSubsets include results from experiment subsets * @param sourceExperimentIdMap a mapping of results to source experiment ID + * @param threshold a maximum threshold on the corrected P-value, between 0 and 1 inclusively * @return differential expression results, grouped by analyzed experiment ID */ - Map findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, boolean includeSubsets, @Nullable Map sourceExperimentIdMap ); + Map findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, boolean includeSubsets, @Nullable Map sourceExperimentIdMap, double threshold ); /** * Find differential expression for a gene in given data sets, exceeding a given significance level (using the diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java index 19a4b75898..ba838d4394 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java @@ -88,8 +88,9 @@ public void remove( DifferentialExpressionAnalysisResult entity ) { } @Override - public Map findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, boolean includeSubsets, @Nullable Map sourceExperimentIdMap ) { + public Map findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, boolean includeSubsets, @Nullable Map sourceExperimentIdMap, double threshold ) { Assert.notNull( gene.getId(), "The gene must have a non-null ID." ); + Assert.isTrue( threshold >= 0.0 && threshold <= 1.0, "Threshold must be in the [0, 1] interval." ); if ( experimentAnalyzedIds.isEmpty() ) { return Collections.emptyMap(); } @@ -127,10 +128,11 @@ public Map findByGeneAndExperimentAn + "join dear.resultSet dears " + "join dears.analysis dea " + "join dea.experimentAnalyzed e " - + "where dear.probe.id in :probeIds and e.id in :bioAssaySetIds " + + "where dear.probe.id in :probeIds and e.id in :bioAssaySetIds and dear.correctedPvalue <= :threshold " // if more than one probe is found, pick the one with the lowest corrected p-value + "group by e order by dear.correctedPvalue" ) - .setParameterList( "probeIds", optimizeParameterList( probeIds ) ); + .setParameterList( "probeIds", optimizeParameterList( probeIds ) ) + .setParameter( "threshold", threshold ); List result = QueryUtils.listByBatch( query, "bioAssaySetIds", bioAssaySetIds, 2048 ); Map rs = new HashMap<>(); for ( Object[] row : result ) { diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java index fd48b9bdbc..d5ecb3b3e3 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java @@ -39,10 +39,10 @@ public interface DifferentialExpressionResultService extends BaseReadOnlyService { /** - * @see DifferentialExpressionResultDao#findByGeneAndExperimentAnalyzed(Gene, Collection, boolean, Map) + * @see DifferentialExpressionResultDao#findByGeneAndExperimentAnalyzed(Gene, Collection, boolean, Map, double) */ @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY" }) - Map findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, Map bioAssaySetIdMap ); + Map findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, Map bioAssaySetIdMap, double threshold ); /** * Given a list of experiments and a threshold value finds all the probes that met the cut off in the given diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java index 647b146b71..a3c38b8373 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java @@ -18,7 +18,6 @@ */ package ubic.gemma.persistence.service.analysis.expression.diff; -import org.apache.commons.lang3.time.StopWatch; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -50,8 +49,8 @@ public DifferentialExpressionResultServiceImpl( DifferentialExpressionResultDao @Override @Transactional(readOnly = true) - public Map findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, Map sourceExperimentIdMap ) { - return DERDao.findByGeneAndExperimentAnalyzed( gene, experimentAnalyzedIds, true, sourceExperimentIdMap ); + public Map findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, Map sourceExperimentIdMap, double threshold ) { + return DERDao.findByGeneAndExperimentAnalyzed( gene, experimentAnalyzedIds, true, sourceExperimentIdMap, threshold ); } @Override diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java index 88c8d047a2..faf812076b 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java @@ -16,6 +16,7 @@ import java.util.Collections; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.mockito.Mockito.mock; @ContextConfiguration @@ -51,10 +52,14 @@ public void testFindByGeneAndExperimentAnalyzedGroupingBySourceExperiment() { .setParameter( 1, cs.getId() ) .setParameter( 2, ad.getId() ) .executeUpdate(); - differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, null ); - differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, null ); - differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), false, null ); - differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), false, null ); + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, null, 1.0 ); + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, null, 1.0 ); + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), false, null, 1.0 ); + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), false, null, 1.0 ); + assertThatThrownBy( () -> differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, null, 1.2 ) ) + .isInstanceOf( IllegalArgumentException.class ); + assertThatThrownBy( () -> differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, null, -1 ) ) + .isInstanceOf( IllegalArgumentException.class ); } @Test diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 66b9a23c27..55ca168278 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -751,9 +751,10 @@ public Response getDatasetDifferentialExpressionAnalysesResultSets( public QueriedAndFilteredResponseDataObject getDatasetsDifferentialAnalysisResultsExpressionForGene( @PathParam("gene") GeneArg geneArg, @QueryParam("query") QueryArg query, - @QueryParam("filter") @DefaultValue("") FilterArg filter + @QueryParam("filter") @DefaultValue("") FilterArg filter, + @QueryParam("threshold") @DefaultValue("1.0") Double threshold ) { - return getDatasetsDifferentialExpressionAnalysisResultsForGeneInternal( null, geneArg, query, filter ); + return getDatasetsDifferentialExpressionAnalysisResultsForGeneInternal( null, geneArg, query, filter, threshold ); } /** @@ -768,12 +769,13 @@ public QueriedAndFilteredResponseDataObject taxonArg, @PathParam("gene") GeneArg geneArg, @QueryParam("query") QueryArg query, - @QueryParam("filter") @DefaultValue("") FilterArg filter + @QueryParam("filter") @DefaultValue("") FilterArg filter, + @QueryParam("threshold") @DefaultValue("1.0") Double threshold ) { - return getDatasetsDifferentialExpressionAnalysisResultsForGeneInternal( taxonArg, geneArg, query, filter ); + return getDatasetsDifferentialExpressionAnalysisResultsForGeneInternal( taxonArg, geneArg, query, filter, threshold ); } - private QueriedAndFilteredResponseDataObject getDatasetsDifferentialExpressionAnalysisResultsForGeneInternal( @Nullable TaxonArg taxonArg, GeneArg geneArg, QueryArg query, FilterArg filter ) { + private QueriedAndFilteredResponseDataObject getDatasetsDifferentialExpressionAnalysisResultsForGeneInternal( @Nullable TaxonArg taxonArg, GeneArg geneArg, QueryArg query, FilterArg filter, double threshold ) { Gene gene; if ( taxonArg != null ) { Taxon taxon = taxonArgService.getEntity( taxonArg ); @@ -782,12 +784,15 @@ private QueriedAndFilteredResponseDataObject 1 ) { + throw new BadRequestException( "The threshold must be in the [0, 1] interval." ); + } Set ids = new HashSet<>( expressionExperimentService.loadIdsWithCache( filters, null ) ); if ( query != null ) { ids.retainAll( datasetArgService.getIdsForSearchQuery( query ) ); } Map sourceExperimentIdMap = new HashMap<>(); - List payload = differentialExpressionResultService.findByGeneAndExperimentAnalyzed( gene, ids, sourceExperimentIdMap ).entrySet().stream() + List payload = differentialExpressionResultService.findByGeneAndExperimentAnalyzed( gene, ids, sourceExperimentIdMap, threshold ).entrySet().stream() .map( e -> new DifferentialExpressionAnalysisResultByGeneValueObject( e.getValue(), sourceExperimentIdMap.get( e.getValue() ), e.getKey() ) ) .sorted( Comparator.comparing( DifferentialExpressionAnalysisResultByGeneValueObject::getPValue, Comparator.nullsLast( Comparator.naturalOrder() ) ) ) .collect( Collectors.toList() ); diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java index 352c1e48d9..daeb912f85 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java @@ -567,7 +567,7 @@ public void testGetDatasetsDifferentialAnalysisResultsExpressionForGene() { .hasFieldOrPropertyWithValue( "sort", "+correctedPvalue" ) .extracting( "groupBy", list( String.class ) ) .containsExactly( "sourceExperimentId", "experimentAnalyzedId" ); - verify( differentialExpressionResultService ).findByGeneAndExperimentAnalyzed( eq( brca1 ), any(), any() ); + verify( differentialExpressionResultService ).findByGeneAndExperimentAnalyzed( eq( brca1 ), any(), any(), anyDouble() ); } @Test @@ -585,7 +585,7 @@ public void testGetDatasetsDifferentialAnalysisResultsExpressionForGeneInTaxa() .hasFieldOrPropertyWithValue( "sort", "+correctedPvalue" ) .extracting( "groupBy", list( String.class ) ) .containsExactly( "sourceExperimentId", "experimentAnalyzedId" ); - verify( differentialExpressionResultService ).findByGeneAndExperimentAnalyzed( eq( brca1 ), any(), any() ); + verify( differentialExpressionResultService ).findByGeneAndExperimentAnalyzed( eq( brca1 ), any(), any(), anyDouble() ); } @Autowired From 9037205705e0061e9202a3e8a7deeee81dff51ec Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 19 Jun 2024 10:56:57 -0700 Subject: [PATCH 31/81] Fix tests --- .../test/java/ubic/gemma/rest/DatasetsWebServiceTest.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java index daeb912f85..b0da96ef6a 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java @@ -564,7 +564,8 @@ public void testGetDatasetsDifferentialAnalysisResultsExpressionForGene() { .hasEncoding( "gzip" ) .entity() .hasFieldOrPropertyWithValue( "filter", "" ) - .hasFieldOrPropertyWithValue( "sort", "+correctedPvalue" ) + .hasFieldOrPropertyWithValue( "sort.direction", "+" ) + .hasFieldOrPropertyWithValue( "sort.orderBy", "correctedPvalue" ) .extracting( "groupBy", list( String.class ) ) .containsExactly( "sourceExperimentId", "experimentAnalyzedId" ); verify( differentialExpressionResultService ).findByGeneAndExperimentAnalyzed( eq( brca1 ), any(), any(), anyDouble() ); @@ -582,7 +583,8 @@ public void testGetDatasetsDifferentialAnalysisResultsExpressionForGeneInTaxa() .hasEncoding( "gzip" ) .entity() .hasFieldOrPropertyWithValue( "filter", "" ) - .hasFieldOrPropertyWithValue( "sort", "+correctedPvalue" ) + .hasFieldOrPropertyWithValue( "sort.direction", "+" ) + .hasFieldOrPropertyWithValue( "sort.orderBy", "correctedPvalue" ) .extracting( "groupBy", list( String.class ) ) .containsExactly( "sourceExperimentId", "experimentAnalyzedId" ); verify( differentialExpressionResultService ).findByGeneAndExperimentAnalyzed( eq( brca1 ), any(), any(), anyDouble() ); From 7b37348099f6b3c42ff630d312f5d82cda4d5a78 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 19 Jun 2024 10:59:28 -0700 Subject: [PATCH 32/81] Undo some cosmetic changes --- ...erentialExpressionAnalysisServiceImpl.java | 7 ++++--- .../ubic/gemma/rest/DatasetsWebService.java | 21 +++++++------------ 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java index ab4dda750d..693ccb2810 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisServiceImpl.java @@ -26,7 +26,10 @@ import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import ubic.gemma.core.tasks.analysis.diffex.DifferentialExpressionAnalysisTask; -import ubic.gemma.model.analysis.expression.diff.*; +import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis; +import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisValueObject; +import ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet; +import ubic.gemma.model.analysis.expression.diff.GeneDifferentialExpressionMetaAnalysis; import ubic.gemma.model.expression.bioAssay.BioAssay; import ubic.gemma.model.expression.experiment.*; import ubic.gemma.model.genome.Gene; @@ -35,9 +38,7 @@ import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentDao; import ubic.gemma.persistence.util.EntityUtils; -import javax.annotation.Nullable; import java.util.*; -import java.util.stream.Collectors; /** * @author paul diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 55ca168278..7a8903fd93 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -1107,10 +1107,8 @@ public ResponseDataObject> getDatase public ResponseDataObject> getDatasetExpressionForGenes( // Params: @PathParam("datasets") DatasetArrayArg datasets, // Required @PathParam("genes") GeneArrayArg genes, // Required - @QueryParam("keepNonSpecific") @DefaultValue("false") Boolean - keepNonSpecific, // Optional, default false - @QueryParam("consolidate") ExpLevelConsolidationArg - consolidate // Optional, default everything is returned + @QueryParam("keepNonSpecific") @DefaultValue("false") Boolean keepNonSpecific, // Optional, default false + @QueryParam("consolidate") ExpLevelConsolidationArg consolidate // Optional, default everything is returned ) { return respond( processedExpressionDataVectorService .getExpressionLevels( datasetArgService.getEntities( datasets ), @@ -1150,10 +1148,8 @@ public ResponseDataObject> getDatase @PathParam("datasets") DatasetArrayArg datasets, // Required @QueryParam("component") @DefaultValue("1") Integer component, // Required, default 1 @QueryParam("limit") @DefaultValue("100") LimitArg limit, // Optional, default 100 - @QueryParam("keepNonSpecific") @DefaultValue("false") Boolean - keepNonSpecific, // Optional, default false - @QueryParam("consolidate") ExpLevelConsolidationArg - consolidate // Optional, default everything is returned + @QueryParam("keepNonSpecific") @DefaultValue("false") Boolean keepNonSpecific, // Optional, default false + @QueryParam("consolidate") ExpLevelConsolidationArg consolidate // Optional, default everything is returned ) { return respond( processedExpressionDataVectorService .getExpressionLevelsPca( datasetArgService.getEntities( datasets ), limit.getValueNoMaximum(), @@ -1190,16 +1186,13 @@ public ResponseDataObject> getDatase @Path("/{datasets}/expressions/differential") @Produces(MediaType.APPLICATION_JSON) @Operation(summary = "Retrieve the expression levels of a set of datasets subject to a threshold on their differential expressions") - public ResponseDataObject> getDatasetDifferentialExpression - ( // Params: + public ResponseDataObject> getDatasetDifferentialExpression( // Params: @PathParam("datasets") DatasetArrayArg datasets, // Required @QueryParam("diffExSet") Long diffExSet, // Required @QueryParam("threshold") @DefaultValue("1.0") Double threshold, // Optional, default 1.0 @QueryParam("limit") @DefaultValue("100") LimitArg limit, // Optional, default 100 - @QueryParam("keepNonSpecific") @DefaultValue("false") Boolean - keepNonSpecific, // Optional, default false - @QueryParam("consolidate") ExpLevelConsolidationArg - consolidate // Optional, default everything is returned + @QueryParam("keepNonSpecific") @DefaultValue("false") Boolean keepNonSpecific, // Optional, default false + @QueryParam("consolidate") ExpLevelConsolidationArg consolidate // Optional, default everything is returned ) { if ( diffExSet == null ) { throw new BadRequestException( "The 'diffExSet' query parameter must be supplied." ); From 9c88dc504097a2b75e8427e1e068548632bfe419 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 19 Jun 2024 11:07:14 -0700 Subject: [PATCH 33/81] Add a limit argument and avoid the jointure with experimentAnalyzed --- .../diff/DifferentialExpressionResultDao.java | 3 ++- .../diff/DifferentialExpressionResultDaoImpl.java | 11 +++++------ .../diff/DifferentialExpressionResultService.java | 4 ++-- .../DifferentialExpressionResultServiceImpl.java | 4 ++-- .../diff/DifferentialExpressionResultDaoTest.java | 12 ++++++------ .../java/ubic/gemma/rest/DatasetsWebService.java | 8 ++++---- .../java/ubic/gemma/rest/DatasetsWebServiceTest.java | 4 ++-- 7 files changed, 23 insertions(+), 23 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java index 3a00896d17..351c10ff63 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDao.java @@ -45,9 +45,10 @@ public interface DifferentialExpressionResultDao extends BaseDao findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, boolean includeSubsets, @Nullable Map sourceExperimentIdMap, double threshold ); + Map findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, boolean includeSubsets, @Nullable Map sourceExperimentIdMap, double threshold, int limit ); /** * Find differential expression for a gene in given data sets, exceeding a given significance level (using the diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java index ba838d4394..38a8790fc6 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java @@ -88,7 +88,7 @@ public void remove( DifferentialExpressionAnalysisResult entity ) { } @Override - public Map findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, boolean includeSubsets, @Nullable Map sourceExperimentIdMap, double threshold ) { + public Map findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, boolean includeSubsets, @Nullable Map sourceExperimentIdMap, double threshold, int limit ) { Assert.notNull( gene.getId(), "The gene must have a non-null ID." ); Assert.isTrue( threshold >= 0.0 && threshold <= 1.0, "Threshold must be in the [0, 1] interval." ); if ( experimentAnalyzedIds.isEmpty() ) { @@ -123,17 +123,16 @@ public Map findByGeneAndExperimentAn bioAssaySetIds.addAll( subsetIds ); } Query query = getSessionFactory().getCurrentSession() - .createQuery( "select dear, e.id from DifferentialExpressionAnalysisResult dear " + .createQuery( "select dear, dea.experimentAnalyzed.id from DifferentialExpressionAnalysisResult dear " + "join fetch dear.contrasts cr " + "join dear.resultSet dears " + "join dears.analysis dea " - + "join dea.experimentAnalyzed e " - + "where dear.probe.id in :probeIds and e.id in :bioAssaySetIds and dear.correctedPvalue <= :threshold " + + "where dear.probe.id in :probeIds and dea.experimentAnalyzed.id in :bioAssaySetIds and dear.correctedPvalue <= :threshold " // if more than one probe is found, pick the one with the lowest corrected p-value - + "group by e order by dear.correctedPvalue" ) + + "group by dea.experimentAnalyzed order by dear.correctedPvalue" ) .setParameterList( "probeIds", optimizeParameterList( probeIds ) ) .setParameter( "threshold", threshold ); - List result = QueryUtils.listByBatch( query, "bioAssaySetIds", bioAssaySetIds, 2048 ); + List result = QueryUtils.listByBatch( query, "bioAssaySetIds", bioAssaySetIds, 2048, limit ); Map rs = new HashMap<>(); for ( Object[] row : result ) { DifferentialExpressionAnalysisResult r = ( DifferentialExpressionAnalysisResult ) row[0]; diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java index d5ecb3b3e3..891d08129c 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultService.java @@ -39,10 +39,10 @@ public interface DifferentialExpressionResultService extends BaseReadOnlyService { /** - * @see DifferentialExpressionResultDao#findByGeneAndExperimentAnalyzed(Gene, Collection, boolean, Map, double) + * @see DifferentialExpressionResultDao#findByGeneAndExperimentAnalyzed(Gene, Collection, boolean, Map, double, int) */ @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY" }) - Map findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, Map bioAssaySetIdMap, double threshold ); + Map findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, Map bioAssaySetIdMap, double threshold, int limit ); /** * Given a list of experiments and a threshold value finds all the probes that met the cut off in the given diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java index a3c38b8373..6ee6e476b7 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultServiceImpl.java @@ -49,8 +49,8 @@ public DifferentialExpressionResultServiceImpl( DifferentialExpressionResultDao @Override @Transactional(readOnly = true) - public Map findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, Map sourceExperimentIdMap, double threshold ) { - return DERDao.findByGeneAndExperimentAnalyzed( gene, experimentAnalyzedIds, true, sourceExperimentIdMap, threshold ); + public Map findByGeneAndExperimentAnalyzed( Gene gene, Collection experimentAnalyzedIds, Map sourceExperimentIdMap, double threshold, int limit ) { + return DERDao.findByGeneAndExperimentAnalyzed( gene, experimentAnalyzedIds, true, sourceExperimentIdMap, threshold, limit ); } @Override diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java index faf812076b..15940af641 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoTest.java @@ -52,13 +52,13 @@ public void testFindByGeneAndExperimentAnalyzedGroupingBySourceExperiment() { .setParameter( 1, cs.getId() ) .setParameter( 2, ad.getId() ) .executeUpdate(); - differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, null, 1.0 ); - differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, null, 1.0 ); - differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), false, null, 1.0 ); - differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), false, null, 1.0 ); - assertThatThrownBy( () -> differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, null, 1.2 ) ) + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, null, 1.0, -1 ); + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, null, 1.0, -1 ); + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), false, null, 1.0, -1 ); + differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), false, null, 1.0, -1 ); + assertThatThrownBy( () -> differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, null, 1.2, -1 ) ) .isInstanceOf( IllegalArgumentException.class ); - assertThatThrownBy( () -> differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, null, -1 ) ) + assertThatThrownBy( () -> differentialExpressionResultDao.findByGeneAndExperimentAnalyzed( gene, Collections.singleton( 1L ), true, null, -1, -1 ) ) .isInstanceOf( IllegalArgumentException.class ); } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 7a8903fd93..d56d0d48c0 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -754,7 +754,7 @@ public QueriedAndFilteredResponseDataObject filter, @QueryParam("threshold") @DefaultValue("1.0") Double threshold ) { - return getDatasetsDifferentialExpressionAnalysisResultsForGeneInternal( null, geneArg, query, filter, threshold ); + return getDatasetsDifferentialExpressionAnalysisResultsForGeneInternal( null, geneArg, query, filter, threshold, 2000 ); } /** @@ -772,10 +772,10 @@ public QueriedAndFilteredResponseDataObject filter, @QueryParam("threshold") @DefaultValue("1.0") Double threshold ) { - return getDatasetsDifferentialExpressionAnalysisResultsForGeneInternal( taxonArg, geneArg, query, filter, threshold ); + return getDatasetsDifferentialExpressionAnalysisResultsForGeneInternal( taxonArg, geneArg, query, filter, threshold, 2000 ); } - private QueriedAndFilteredResponseDataObject getDatasetsDifferentialExpressionAnalysisResultsForGeneInternal( @Nullable TaxonArg taxonArg, GeneArg geneArg, QueryArg query, FilterArg filter, double threshold ) { + private QueriedAndFilteredResponseDataObject getDatasetsDifferentialExpressionAnalysisResultsForGeneInternal( @Nullable TaxonArg taxonArg, GeneArg geneArg, QueryArg query, FilterArg filter, double threshold, int limit ) { Gene gene; if ( taxonArg != null ) { Taxon taxon = taxonArgService.getEntity( taxonArg ); @@ -792,7 +792,7 @@ private QueriedAndFilteredResponseDataObject sourceExperimentIdMap = new HashMap<>(); - List payload = differentialExpressionResultService.findByGeneAndExperimentAnalyzed( gene, ids, sourceExperimentIdMap, threshold ).entrySet().stream() + List payload = differentialExpressionResultService.findByGeneAndExperimentAnalyzed( gene, ids, sourceExperimentIdMap, threshold, limit ).entrySet().stream() .map( e -> new DifferentialExpressionAnalysisResultByGeneValueObject( e.getValue(), sourceExperimentIdMap.get( e.getValue() ), e.getKey() ) ) .sorted( Comparator.comparing( DifferentialExpressionAnalysisResultByGeneValueObject::getPValue, Comparator.nullsLast( Comparator.naturalOrder() ) ) ) .collect( Collectors.toList() ); diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java index b0da96ef6a..2f4f28f6d4 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java @@ -568,7 +568,7 @@ public void testGetDatasetsDifferentialAnalysisResultsExpressionForGene() { .hasFieldOrPropertyWithValue( "sort.orderBy", "correctedPvalue" ) .extracting( "groupBy", list( String.class ) ) .containsExactly( "sourceExperimentId", "experimentAnalyzedId" ); - verify( differentialExpressionResultService ).findByGeneAndExperimentAnalyzed( eq( brca1 ), any(), any(), anyDouble() ); + verify( differentialExpressionResultService ).findByGeneAndExperimentAnalyzed( eq( brca1 ), any(), any(), anyDouble(), eq( 2000 ) ); } @Test @@ -587,7 +587,7 @@ public void testGetDatasetsDifferentialAnalysisResultsExpressionForGeneInTaxa() .hasFieldOrPropertyWithValue( "sort.orderBy", "correctedPvalue" ) .extracting( "groupBy", list( String.class ) ) .containsExactly( "sourceExperimentId", "experimentAnalyzedId" ); - verify( differentialExpressionResultService ).findByGeneAndExperimentAnalyzed( eq( brca1 ), any(), any(), anyDouble() ); + verify( differentialExpressionResultService ).findByGeneAndExperimentAnalyzed( eq( brca1 ), any(), any(), anyDouble(), eq( 2000 ) ); } @Autowired From a2d1624cbe41ba87c589db640d1ceb167ed52f28 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 19 Jun 2024 11:12:41 -0700 Subject: [PATCH 34/81] Include inferred terms and limit in payload --- .../main/java/ubic/gemma/rest/DatasetsWebService.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index d56d0d48c0..27f5c9e063 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -748,7 +748,7 @@ public Response getDatasetDifferentialExpressionAnalysesResultSets( @Path("/analyses/differential/results/gene/{gene}") @Produces(MediaType.APPLICATION_JSON) @Operation(summary = "Retrieve the differential expression results for a given gene") - public QueriedAndFilteredResponseDataObject getDatasetsDifferentialAnalysisResultsExpressionForGene( + public QueriedAndFilteredAndInferredAndLimitedResponseDataObject getDatasetsDifferentialAnalysisResultsExpressionForGene( @PathParam("gene") GeneArg geneArg, @QueryParam("query") QueryArg query, @QueryParam("filter") @DefaultValue("") FilterArg filter, @@ -765,7 +765,7 @@ public QueriedAndFilteredResponseDataObject getDatasetsDifferentialAnalysisResultsExpressionForGeneInTaxa( + public QueriedAndFilteredAndInferredAndLimitedResponseDataObject getDatasetsDifferentialAnalysisResultsExpressionForGeneInTaxa( @PathParam("taxa") TaxonArg taxonArg, @PathParam("gene") GeneArg geneArg, @QueryParam("query") QueryArg query, @@ -775,7 +775,7 @@ public QueriedAndFilteredResponseDataObject getDatasetsDifferentialExpressionAnalysisResultsForGeneInternal( @Nullable TaxonArg taxonArg, GeneArg geneArg, QueryArg query, FilterArg filter, double threshold, int limit ) { + private QueriedAndFilteredAndInferredAndLimitedResponseDataObject getDatasetsDifferentialExpressionAnalysisResultsForGeneInternal( @Nullable TaxonArg taxonArg, GeneArg geneArg, QueryArg query, FilterArg filter, double threshold, int limit ) { Gene gene; if ( taxonArg != null ) { Taxon taxon = taxonArgService.getEntity( taxonArg ); @@ -783,7 +783,8 @@ private QueriedAndFilteredResponseDataObject inferredTerms = new HashSet<>(); + Filters filters = datasetArgService.getFilters( filter, null, inferredTerms ); if ( threshold < 0 || threshold > 1 ) { throw new BadRequestException( "The threshold must be in the [0, 1] interval." ); } @@ -796,7 +797,7 @@ private QueriedAndFilteredResponseDataObject new DifferentialExpressionAnalysisResultByGeneValueObject( e.getValue(), sourceExperimentIdMap.get( e.getValue() ), e.getKey() ) ) .sorted( Comparator.comparing( DifferentialExpressionAnalysisResultByGeneValueObject::getPValue, Comparator.nullsLast( Comparator.naturalOrder() ) ) ) .collect( Collectors.toList() ); - return Responders.all( payload, query != null ? query.getValue() : null, filters, new String[] { "sourceExperimentId", "experimentAnalyzedId" }, Sort.by( null, "correctedPvalue", Sort.Direction.ASC, "correctedPvalue" ) ); + return top( payload, query != null ? query.getValue() : null, filters, new String[] { "sourceExperimentId", "experimentAnalyzedId" }, Sort.by( null, "correctedPvalue", Sort.Direction.ASC, "correctedPvalue" ), 2000, inferredTerms ); } @Data From 456aca3ae5a4b33957ad7956b50d0d6f9ad84116 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 19 Jun 2024 11:37:12 -0700 Subject: [PATCH 35/81] Initialize contrasts separately Using a join fetch is not efficient if the query is limited. --- .../expression/diff/DifferentialExpressionResultDaoImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java index 38a8790fc6..00264e042d 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java @@ -124,7 +124,6 @@ public Map findByGeneAndExperimentAn } Query query = getSessionFactory().getCurrentSession() .createQuery( "select dear, dea.experimentAnalyzed.id from DifferentialExpressionAnalysisResult dear " - + "join fetch dear.contrasts cr " + "join dear.resultSet dears " + "join dears.analysis dea " + "where dear.probe.id in :probeIds and dea.experimentAnalyzed.id in :bioAssaySetIds and dear.correctedPvalue <= :threshold " @@ -136,6 +135,7 @@ public Map findByGeneAndExperimentAn Map rs = new HashMap<>(); for ( Object[] row : result ) { DifferentialExpressionAnalysisResult r = ( DifferentialExpressionAnalysisResult ) row[0]; + Hibernate.initialize( r.getContrasts() ); Long bioAssaySetId = ( Long ) row[1]; rs.put( bioAssaySetId, r ); if ( sourceExperimentIdMap != null ) { From 4b35fefc4909a3b66d131fed0beef76920e7bc81 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 19 Jun 2024 12:23:15 -0700 Subject: [PATCH 36/81] Improve loading of result sets and sort results by P-value --- .../ExpressionAnalysisResultSetDaoImpl.java | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/ExpressionAnalysisResultSetDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/ExpressionAnalysisResultSetDaoImpl.java index 3112fe13a6..d30d1b68ff 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/ExpressionAnalysisResultSetDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/ExpressionAnalysisResultSetDaoImpl.java @@ -75,21 +75,23 @@ public void remove( ExpressionAnalysisResultSet entity ) { @Override public ExpressionAnalysisResultSet loadWithResultsAndContrasts( Long id ) { StopWatch timer = StopWatch.createStarted(); - ExpressionAnalysisResultSet ears = ( ExpressionAnalysisResultSet ) getSessionFactory().getCurrentSession() - .createQuery( "select ears from ExpressionAnalysisResultSet ears " - + "left join fetch ears.results res " - + "left join fetch res.contrasts " - + "where ears.id = :rsId" ) - .setParameter( "rsId", id ) - .uniqueResult(); + ExpressionAnalysisResultSet ears = load( id ); if ( ears != null ) { - for ( DifferentialExpressionAnalysisResult r : ears.getResults() ) { - // will also initialize the biological characteristics and sequence database entries - // this is efficient because of batch loading and second-level caching + //noinspection unchecked + List results = ( List ) getSessionFactory().getCurrentSession() + .createQuery( "select res from DifferentialExpressionAnalysisResult res " + + "where res.resultSet = :ears " + + "order by res.correctedPvalue" ) + .setParameter( "ears", ears ) + .list(); + for ( DifferentialExpressionAnalysisResult r : results ) { Hibernate.initialize( r.getProbe() ); + Hibernate.initialize( r.getContrasts() ); } + // preserve order of results + ears.setResults( new LinkedHashSet<>( results ) ); } - if ( timer.getTime() > 1000 ) { + if ( timer.getTime() > 5000 ) { log.info( String.format( "Loaded [%s id=%d] with results, probes and contrasts in %d ms.", elementClass.getName(), id, timer.getTime() ) ); } @@ -115,14 +117,12 @@ public ExpressionAnalysisResultSet loadWithResultsAndContrasts( Long id, int off .list(); for ( DifferentialExpressionAnalysisResult r : results ) { Hibernate.initialize( r.getProbe() ); - } - for ( DifferentialExpressionAnalysisResult r : results ) { Hibernate.initialize( r.getContrasts() ); } // preserve order of results ears.setResults( new LinkedHashSet<>( results ) ); } - if ( timer.getTime() > 1000 ) { + if ( timer.getTime() > 100 ) { log.info( String.format( "Loaded [%s id=%d] with results, probes and contrasts in %d ms.", elementClass.getName(), id, timer.getTime() ) ); } @@ -150,14 +150,12 @@ public ExpressionAnalysisResultSet loadWithResultsAndContrasts( Long id, double .list(); for ( DifferentialExpressionAnalysisResult r : results ) { Hibernate.initialize( r.getProbe() ); - } - for ( DifferentialExpressionAnalysisResult r : results ) { Hibernate.initialize( r.getContrasts() ); } // preserve order of results ears.setResults( new LinkedHashSet<>( results ) ); } - if ( timer.getTime() > 1000 ) { + if ( timer.getTime() > 100 ) { log.info( String.format( "Loaded [%s id=%d] with results, probes and contrasts in %d ms.", elementClass.getName(), id, timer.getTime() ) ); } From 92cef5e65b4c5fe118bc5489066bbfbdd11a971a Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 19 Jun 2024 13:48:58 -0700 Subject: [PATCH 37/81] Improve tabular serialization of continuous factors --- ...ssionAnalysisResultSetFileServiceImpl.java | 92 ++++++++---- .../analysis/expression/diff/Contrast.java | 140 ++++++++++++++++++ .../expression/diff/ContrastTest.java | 48 ++++++ 3 files changed, 252 insertions(+), 28 deletions(-) create mode 100644 gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/Contrast.java create mode 100644 gemma-core/src/test/java/ubic/gemma/model/analysis/expression/diff/ContrastTest.java diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/service/ExpressionAnalysisResultSetFileServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/service/ExpressionAnalysisResultSetFileServiceImpl.java index 48e6c79c49..8a53f5dcb1 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/service/ExpressionAnalysisResultSetFileServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/service/ExpressionAnalysisResultSetFileServiceImpl.java @@ -2,17 +2,20 @@ import lombok.extern.apachecommons.CommonsLog; import org.apache.commons.csv.CSVPrinter; -import org.apache.commons.lang3.tuple.Pair; import org.springframework.stereotype.Service; +import org.springframework.util.Assert; +import ubic.gemma.model.analysis.expression.diff.Contrast; import ubic.gemma.model.analysis.expression.diff.ContrastResult; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult; import ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet; import ubic.gemma.model.common.description.Characteristic; import ubic.gemma.model.common.measurement.Measurement; import ubic.gemma.model.expression.experiment.ExperimentalFactor; +import ubic.gemma.model.expression.experiment.FactorType; import ubic.gemma.model.expression.experiment.FactorValue; import ubic.gemma.model.genome.Gene; +import javax.annotation.Nullable; import java.io.IOException; import java.io.Writer; import java.util.*; @@ -33,29 +36,45 @@ public void writeTsvToAppendable( ExpressionAnalysisResultSet analysisResultSet, // add the basic columns List header = new ArrayList<>( Arrays.asList( "id", "probe_id", "probe_name", "gene_id", "gene_name", "gene_ncbi_id", "gene_official_symbol", "gene_official_name", "pvalue", "corrected_pvalue", "rank" ) ); + // for continuous + Set factorsIfContinuous = analysisResultSet.getExperimentalFactors().stream() + .filter( ef -> ef.getType().equals( FactorType.CONTINUOUS ) ) + .collect( Collectors.toSet() ); + ExperimentalFactor factorIfContinuous; + if ( factorsIfContinuous.isEmpty() ) { + factorIfContinuous = null; + } else if ( factorsIfContinuous.size() == 1 ) { + factorIfContinuous = factorsIfContinuous.iterator().next(); + } else { + throw new UnsupportedOperationException( "Result sets with more than one continuous factor are not supported." ); + } + // this is the order the factor values are displayed - Comparator contrastResultComparator = Comparator - .comparing( ContrastResult::getFactorValue, Comparator.nullsLast( Comparator.comparing( FactorValue::getId ) ) ) - .thenComparing( ContrastResult::getSecondFactorValue, Comparator.nullsLast( Comparator.comparing( FactorValue::getId ) ) ); + // this is only relevant for interactions + Comparator contrastResultComparator = Comparator + .comparing( Contrast::getFactorValue, Comparator.nullsLast( Comparator.comparing( FactorValue::getId ) ) ) + .thenComparing( Contrast::getSecondFactorValue, Comparator.nullsLast( Comparator.comparing( FactorValue::getId ) ) ); // we need to peek in the contrast result to understand factor value interactions // i.e. interaction between genotype and time point might result in a contrast_male_3h column, although we would // use factor value IDs in the actual column name which might result in something like contrast_1292_2938 - final List firstContrastResults = analysisResultSet.getResults().stream() - .findFirst() - .map( DifferentialExpressionAnalysisResult::getContrasts ) - .orElse( Collections.emptySet() ) - .stream().sorted( contrastResultComparator ) - .collect( Collectors.toList() ); - - for ( ContrastResult contrastResult : firstContrastResults ) { - String contrastResultPrefix = "contrast" - + ( contrastResult.getFactorValue() != null ? "_" + contrastResult.getFactorValue().getId() : "" ) - + ( contrastResult.getSecondFactorValue() != null ? "_" + contrastResult.getSecondFactorValue().getId() : "" ); + LinkedHashSet allContrasts = analysisResultSet.getResults().stream() + .flatMap( r -> r.getContrasts().stream() ) + .map( c -> contrastFromResult( c, factorIfContinuous ) ) + .sorted( contrastResultComparator ) + .collect( Collectors.toCollection( LinkedHashSet::new ) ); + + for ( Contrast contrast : allContrasts ) { + StringBuilder contrastResultPrefix = new StringBuilder( "contrast_" ); + // this could be empty for a continuous factor, in which case it will be serialized as contrast_log2fc, + // contrast_tstat, etc... + for ( FactorValue fv : contrast.getFactorValues() ) { + contrastResultPrefix.append( fv.getId() ).append( "_" ); + } header.addAll( Arrays.asList( - contrastResultPrefix + "_log2fc", - contrastResultPrefix + "_tstat", - contrastResultPrefix + "_pvalue" ) ); + contrastResultPrefix + "log2fc", + contrastResultPrefix + "tstat", + contrastResultPrefix + "pvalue" ) ); } try ( CSVPrinter printer = getTsvFormatBuilder( "Experimental factors: " + experimentalFactorsMetadata ) @@ -75,21 +94,19 @@ public void writeTsvToAppendable( ExpressionAnalysisResultSet analysisResultSet, format( analysisResult.getPvalue() ), format( analysisResult.getCorrectedPvalue() ), format( analysisResult.getRank() ) ) ); - Map, ContrastResult> contrastsByFirstAndSecondFactorValue = analysisResult.getContrasts().stream() - .collect( Collectors.toMap( fv -> Pair.of( fv.getFactorValue(), fv.getSecondFactorValue() ), identity() ) ); + Map contrastResultMap = analysisResult.getContrasts().stream() + .collect( Collectors.toMap( cr -> contrastFromResult( cr, factorIfContinuous ), identity() ) ); // render contrast results in the same order than the first row and handle possibly missing columns - for ( ContrastResult contrastResult : firstContrastResults ) { - ContrastResult cr = contrastsByFirstAndSecondFactorValue.get( Pair.of( contrastResult.getFactorValue(), contrastResult.getSecondFactorValue() ) ); + for ( Contrast contrast : allContrasts ) { + ContrastResult cr = contrastResultMap.get( contrast ); if ( cr != null ) { record.add( format( cr.getLogFoldChange() ) ); record.add( format( cr.getTstat() ) ); record.add( format( cr.getPvalue() ) ); } else { - log.warn( String.format( "%s is missing contrast result for [%s, %s]. The corresponding column in the TSV will be treated as NaN.", - analysisResult, contrastResult.getFactorValue(), contrastResult.getSecondFactorValue() ) ); - record.add( "" ); - record.add( "" ); - record.add( "" ); + record.add( format( Double.NaN ) ); + record.add( format( Double.NaN ) ); + record.add( format( Double.NaN ) ); } } printer.printRecord( record ); @@ -97,6 +114,23 @@ public void writeTsvToAppendable( ExpressionAnalysisResultSet analysisResultSet, } } + /** + * Create a contrast from a {@link ContrastResult}. + * @param factorIfContinuous a factor to use if the contrast is continuous + */ + private Contrast contrastFromResult( ContrastResult cr, @Nullable ExperimentalFactor factorIfContinuous ) { + if ( cr.getSecondFactorValue() != null ) { + Assert.notNull( cr.getFactorValue(), "There must be a first factor value if a second factor value is present." ); + return Contrast.interaction( cr.getFactorValue(), cr.getSecondFactorValue() ); + } else if ( cr.getFactorValue() != null ) { + return Contrast.categorical( cr.getFactorValue() ); + } else if ( factorIfContinuous != null ) { + return Contrast.continuous( factorIfContinuous ); + } else { + throw new IllegalArgumentException( "A factor must be provided for a continuous contrast." ); + } + } + private String formatExperimentalFactor( ExperimentalFactor experimentalFactor ) { return "name: " + experimentalFactor.getName() + ", values: [" + experimentalFactor.getFactorValues() @@ -107,9 +141,11 @@ private String formatExperimentalFactor( ExperimentalFactor experimentalFactor ) private String formatFactorValue( FactorValue factorValue ) { + if ( factorValue.getMeasurement() != null ) { + return formatMeasurement( factorValue.getMeasurement() ); + } return "id: " + factorValue.getId() + ( factorValue.getIsBaseline() != null && factorValue.getIsBaseline() ? "*" : "" ) - + ( factorValue.getMeasurement() != null ? ", measurement: " + formatMeasurement( factorValue.getMeasurement() ) : "" ) + ", characteristics: [" + formatCharacteristics( factorValue.getCharacteristics() ) + "]"; } diff --git a/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/Contrast.java b/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/Contrast.java new file mode 100644 index 0000000000..9465cab42a --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/model/analysis/expression/diff/Contrast.java @@ -0,0 +1,140 @@ +package ubic.gemma.model.analysis.expression.diff; + +import org.springframework.util.Assert; +import ubic.gemma.model.expression.experiment.ExperimentalFactor; +import ubic.gemma.model.expression.experiment.FactorType; +import ubic.gemma.model.expression.experiment.FactorValue; + +import javax.annotation.Nullable; +import javax.annotation.ParametersAreNonnullByDefault; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +/** + * Represents a contrast. + * @author poirigui + */ +@ParametersAreNonnullByDefault +public class Contrast { + + /** + * Create a contrast for a continuous factor. + */ + public static Contrast continuous( ExperimentalFactor ef ) { + Assert.isTrue( ef.getType().equals( FactorType.CONTINUOUS ) ); + return new Contrast( ef ); + } + + /** + * Create a contrast for a categorical factor. + */ + public static Contrast categorical( FactorValue fv ) { + Assert.isTrue( fv.getExperimentalFactor().getType().equals( FactorType.CATEGORICAL ) ); + return new Contrast( fv ); + } + + /** + * Create an interaction of two categorical factors. + */ + public static Contrast interaction( FactorValue fv1, FactorValue fv2 ) { + Assert.isTrue( fv1.getExperimentalFactor().getType().equals( FactorType.CATEGORICAL ) ); + Assert.isTrue( fv2.getExperimentalFactor().getType().equals( FactorType.CATEGORICAL ) ); + Assert.isTrue( !fv1.getExperimentalFactor().equals( fv2.getExperimentalFactor() ), + "An interaction must be of two different experimental factors." ); + return new Contrast( fv1, fv2 ); + } + + /** + * Necessary for continuous factors because they lack specific FVs. + */ + private final ExperimentalFactor experimentalFactor; + + @Nullable + private final FactorValue factorValue; + @Nullable + private final FactorValue secondFactorValue; + + private final List factorValues; + + private Contrast( ExperimentalFactor experimentalFactor ) { + this.experimentalFactor = experimentalFactor; + this.factorValue = null; + this.secondFactorValue = null; + this.factorValues = Collections.emptyList(); + } + + private Contrast( FactorValue fv ) { + this.experimentalFactor = null; + this.factorValue = fv; + this.secondFactorValue = null; + factorValues = Collections.singletonList( fv ); + } + + private Contrast( FactorValue fv, FactorValue fv2 ) { + this.experimentalFactor = null; + this.factorValue = fv; + this.secondFactorValue = fv2; + factorValues = Arrays.asList( fv, fv2 ); + } + + public ExperimentalFactor getExperimentalFactor() { + return experimentalFactor; + } + + @Nullable + public FactorValue getFactorValue() { + return factorValue; + } + + @Nullable + public FactorValue getSecondFactorValue() { + return secondFactorValue; + } + + public List getFactorValues() { + return factorValues; + } + + /** + * Indicate if this contrast is continuous. + */ + public boolean isContinuous() { + return factorValue == null; + } + + /** + * Indicate if this contrast is an interaction of two or more factors. + */ + public boolean isInteraction() { + return secondFactorValue != null; + } + + @Override + public boolean equals( Object obj ) { + if ( obj == this ) { + return true; + } + if ( !( obj instanceof Contrast ) ) { + return false; + } + Contrast that = ( Contrast ) obj; + return Objects.equals( experimentalFactor, that.experimentalFactor ) + && Objects.equals( factorValue, that.factorValue ) + && Objects.equals( secondFactorValue, that.secondFactorValue ); + } + + @Override + public int hashCode() { + return Objects.hash( factorValue, secondFactorValue ); + } + + @Override + public String toString() { + return "Contrast for " + + ( factorValue != null ? factorValue : "[continuous]" ) + + ( secondFactorValue != null ? ":" + secondFactorValue : "" ); + } +} + diff --git a/gemma-core/src/test/java/ubic/gemma/model/analysis/expression/diff/ContrastTest.java b/gemma-core/src/test/java/ubic/gemma/model/analysis/expression/diff/ContrastTest.java new file mode 100644 index 0000000000..cc0434f937 --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/model/analysis/expression/diff/ContrastTest.java @@ -0,0 +1,48 @@ +package ubic.gemma.model.analysis.expression.diff; + +import org.junit.Test; +import ubic.gemma.model.expression.experiment.ExperimentalFactor; +import ubic.gemma.model.expression.experiment.FactorType; +import ubic.gemma.model.expression.experiment.FactorValue; + +import static org.junit.Assert.*; + +public class ContrastTest { + + @Test + public void test() { + ExperimentalFactor ef = new ExperimentalFactor(); + ef.setType( FactorType.CATEGORICAL ); + FactorValue fv1 = new FactorValue(); + fv1.setExperimentalFactor( ef ); + fv1.setId( 1L ); + assertEquals( Contrast.categorical( fv1 ), Contrast.categorical( fv1 ) ); + assertFalse( Contrast.categorical( fv1 ).isInteraction() ); + assertFalse( Contrast.categorical( fv1 ).isContinuous() ); + } + + + @Test + public void testInteraction() { + ExperimentalFactor ef = new ExperimentalFactor(); + ef.setType( FactorType.CATEGORICAL ); + FactorValue fv1 = new FactorValue(); + fv1.setExperimentalFactor( ef ); + fv1.setId( 1L ); + ExperimentalFactor ef2 = new ExperimentalFactor(); + ef2.setType( FactorType.CATEGORICAL ); + FactorValue fv2 = new FactorValue(); + fv2.setId( 2L ); + fv2.setExperimentalFactor( ef2 ); + assertEquals( Contrast.categorical( fv1 ), Contrast.categorical( fv1 ) ); + assertFalse( Contrast.categorical( fv1 ).isInteraction() ); + assertFalse( Contrast.categorical( fv1 ).isContinuous() ); + } + + @Test + public void testContinuous() { + ExperimentalFactor ef = new ExperimentalFactor(); + ef.setType( FactorType.CONTINUOUS ); + assertTrue( Contrast.continuous( ef ).isContinuous() ); + } +} \ No newline at end of file From 6cf71643889f22bac7fe8444ad42f981e44a9d94 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 19 Jun 2024 14:45:49 -0700 Subject: [PATCH 38/81] Revert "Treat BatchInformationMissingEvent as no batch info" This reverts commit 37637992360dddfc7d161b9691053d68cd692b00. --- .../src/main/java/ubic/gemma/rest/DatasetsWebService.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 27f5c9e063..96e3589944 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -49,9 +49,7 @@ import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResultValueObject; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisValueObject; -import ubic.gemma.model.common.auditAndSecurity.AuditEvent; import ubic.gemma.model.common.auditAndSecurity.eventType.BatchInformationFetchingEvent; -import ubic.gemma.model.common.auditAndSecurity.eventType.BatchInformationMissingEvent; import ubic.gemma.model.common.description.AnnotationValueObject; import ubic.gemma.model.common.description.Characteristic; import ubic.gemma.model.common.description.CharacteristicValueObject; @@ -1024,8 +1022,7 @@ public ResponseDataObject getDatasetHasBatchInformation( // Params: @PathParam("dataset") DatasetArg datasetArg // Required ) { ExpressionExperiment ee = datasetArgService.getEntity( datasetArg ); - AuditEvent lastBatchInfoEvent = auditEventService.getLastEvent( ee, BatchInformationFetchingEvent.class ); - return respond( lastBatchInfoEvent == null || lastBatchInfoEvent.getEventType() instanceof BatchInformationMissingEvent ); + return respond( this.auditEventService.hasEvent( ee, BatchInformationFetchingEvent.class ) ); } /** From 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 19 Jun 2024 15:26:57 -0700 Subject: [PATCH 39/81] Don't produce a FailedBatchInformationFetchingEvent when batch info is missing --- .../preprocess/PreprocessingException.java | 6 +- .../BatchInfoMissingException.java | 17 ++++ .../batcheffects/BatchInfoParser.java | 2 +- .../BatchInfoPopulationException.java | 4 + .../BatchInfoPopulationServiceImpl.java | 85 ++++++++----------- 5 files changed, 62 insertions(+), 52 deletions(-) create mode 100644 gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoMissingException.java diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/PreprocessingException.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/PreprocessingException.java index ee599ed137..85d8c658ce 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/PreprocessingException.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/PreprocessingException.java @@ -19,7 +19,7 @@ /** * Allows us to catch preprocessing errors and handle them correctly. - * + *

* The main kind of preprocessing exceptions are {@link ubic.gemma.core.analysis.preprocess.filter.FilteringException} * and {@link ubic.gemma.core.analysis.preprocess.batcheffects.BatchInfoPopulationException}. * @@ -33,6 +33,10 @@ public PreprocessingException( ExpressionExperiment ee, String message ) { super( String.format( "Failed to pre-process %s: %s", ee.getShortName(), message ) ); } + public PreprocessingException( ExpressionExperiment ee, String message, Throwable cause ) { + super( String.format( "Failed to pre-process %s: %s", ee.getShortName(), message ), cause ); + } + public PreprocessingException( ExpressionExperiment ee, Throwable cause ) { super( String.format( "Failed to pre-process %s: %s", ee.getShortName(), ExceptionUtils.getRootCauseMessage( cause ) ), cause ); } diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoMissingException.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoMissingException.java new file mode 100644 index 0000000000..a151a0639a --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoMissingException.java @@ -0,0 +1,17 @@ +package ubic.gemma.core.analysis.preprocess.batcheffects; + +import ubic.gemma.model.expression.experiment.ExpressionExperiment; + +/** + * Indicate that batch information is missing. + */ +public class BatchInfoMissingException extends BatchInfoPopulationException { + + public BatchInfoMissingException( ExpressionExperiment ee, String message ) { + super( ee, message ); + } + + public BatchInfoMissingException( ExpressionExperiment ee, String message, Throwable cause ) { + super( ee, message, cause ); + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoParser.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoParser.java index 917985ff9c..f5c0e4895e 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoParser.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoParser.java @@ -90,7 +90,7 @@ public Map getBatchInfo( ExpressionExperiment ee, Collection< } } } - throw new BatchInfoPopulationException(ee, + throw new BatchInfoMissingException( ee, "Did not get enough raw files :got " + bioAssays2Files.size() + ", expected " + assayAccessions .size() + " while processing " + ee.getShortName() ); } diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationException.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationException.java index 0af559758a..43f15cd509 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationException.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationException.java @@ -32,4 +32,8 @@ public BatchInfoPopulationException( ExpressionExperiment ee, String message ) { public BatchInfoPopulationException( ExpressionExperiment ee, Throwable cause ) { super( ee, cause ); } + + public BatchInfoPopulationException( ExpressionExperiment ee, String message, Throwable cause ) { + super( ee, message, cause ); + } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationServiceImpl.java index 5ca8995690..f3d5987624 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationServiceImpl.java @@ -20,16 +20,20 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; -import ubic.gemma.model.expression.experiment.ExperimentalDesignUtils; +import ubic.gemma.core.config.Settings; import ubic.gemma.core.loader.expression.geo.fetcher.RawDataFetcher; import ubic.gemma.model.common.auditAndSecurity.AuditEvent; -import ubic.gemma.model.common.auditAndSecurity.eventType.*; +import ubic.gemma.model.common.auditAndSecurity.eventType.BatchInformationFetchingEvent; +import ubic.gemma.model.common.auditAndSecurity.eventType.BatchInformationMissingEvent; +import ubic.gemma.model.common.auditAndSecurity.eventType.FailedBatchInformationFetchingEvent; +import ubic.gemma.model.common.auditAndSecurity.eventType.SingleBatchDeterminationEvent; import ubic.gemma.model.common.description.Characteristic; import ubic.gemma.model.common.description.DatabaseEntry; import ubic.gemma.model.common.description.LocalFile; import ubic.gemma.model.expression.bioAssay.BioAssay; import ubic.gemma.model.expression.biomaterial.BioMaterial; import ubic.gemma.model.expression.experiment.ExperimentalDesign; +import ubic.gemma.model.expression.experiment.ExperimentalDesignUtils; import ubic.gemma.model.expression.experiment.ExperimentalFactor; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; @@ -38,7 +42,6 @@ import ubic.gemma.persistence.service.expression.experiment.ExperimentalFactorService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.util.EntityUtils; -import ubic.gemma.core.config.Settings; import java.io.BufferedReader; import java.io.File; @@ -120,25 +123,23 @@ public void fillBatchInformation( ExpressionExperiment ee, boolean force ) throw Collection files = null; try { if ( isRNASeq ) { - this.getBatchDataFromFASTQHeaders( ee ); - return; - } - - files = this.fetchRawDataFiles( ee ); - if ( files == null || files.isEmpty() ) { - this.auditTrailService - .addUpdateEvent( ee, BatchInformationMissingEvent.class, "No files were found" ); - throw new BatchInfoPopulationException( ee, "No file were found." ); + this.createBatchFactorFromFASTQHeaders( ee ); + } else { + // microarray case + files = this.fetchRawDataFiles( ee ); + if ( files == null || files.isEmpty() ) { + throw new BatchInfoMissingException( ee, "No file were found." ); + } + this.getBatchDataFromRawFiles( ee, files ); } - this.getBatchDataFromRawFiles( ee, files ); - + } catch ( BatchInfoMissingException e ) { + this.auditTrailService.addUpdateEvent( ee, BatchInformationMissingEvent.class, e.getMessage(), e ); + throw e; } catch ( Exception e ) { - - if ( BatchInfoPopulationException.class.isAssignableFrom( e.getClass() ) ) { - throw ( BatchInfoPopulationException ) e; - } - this.auditTrailService.addUpdateEvent( ee, FailedBatchInformationFetchingEvent.class, e.getMessage(), e ); + if ( e instanceof BatchInfoPopulationException ) { + throw e; + } throw new BatchInfoPopulationException( ee, e ); } finally { if ( BatchInfoPopulationServiceImpl.CLEAN_UP && files != null ) { @@ -199,30 +200,26 @@ private Collection fetchRawDataFiles( ExpressionExperiment ee ) { /** * Look for batch information and create a Factor for batch if there is more than one batch. - * - * @throws IOException if there was a problem reading the FASTQ headers */ - private void getBatchDataFromFASTQHeaders( ExpressionExperiment ee ) throws IOException { - + private void createBatchFactorFromFASTQHeaders( ExpressionExperiment ee ) { // Read and store header data. - - Map headers; + // map of sample ID to raw headers + Map rawHeaders; try { - headers = getFastqHeaders( ee ); + rawHeaders = readFastqHeaders( ee ); } catch ( IOException e ) { - this.auditTrailService - .addUpdateEvent( ee, BatchInformationMissingEvent.class, "Failed to locate FASTQ header information", e.getMessage() ); - throw new IOException( "Error while processing FASTQ headers for " + ee + ": " + e.getMessage(), e ); + throw new BatchInfoMissingException( ee, "Failed to locate FASTQ header information", e ); } - if ( headers == null || headers.isEmpty() ) { - this.auditTrailService - .addUpdateEvent( ee, BatchInformationMissingEvent.class, "No FASTQ headers found", "" ); - throw new IOException( "No FASTQ headers found for " + ee ); + if ( rawHeaders == null || rawHeaders.isEmpty() ) { + throw new BatchInfoMissingException( ee, "FASTQ header file was empty." ); } + Map headers = assignRawHeadersToSamples( ee, rawHeaders ); + // Create batch factor. this.removeExistingBatchFactor( ee ); + ExperimentalFactor bf = batchInfoPopulationHelperService.createRnaSeqBatchFactor( ee, headers ); if ( bf != null ) { @@ -252,22 +249,14 @@ private void getBatchDataFromRawFiles( ExpressionExperiment ee, Collection dates = null; - try { - dates = batchInfoParser.getBatchInfo( ee, files ); - } catch ( BatchInfoPopulationException e ) { - BatchInfoPopulationServiceImpl.log - .info( "No batch informatino for: " + ee.getShortName() ); - this.auditTrailService.addUpdateEvent( ee, BatchInformationMissingEvent.class, e.getMessage(), e ); - throw e; - } + Map dates = batchInfoParser.getBatchInfo( ee, files ); this.removeExistingBatchFactor( ee ); ExperimentalFactor factor = batchInfoPopulationHelperService.createBatchFactor( ee, dates ); // we don't make a batch factor if there is just one batch. - int numberOfBatches = factor == null || factor.getFactorValues().size() == 0 ? 1 : factor.getFactorValues().size(); + int numberOfBatches = factor == null || factor.getFactorValues().isEmpty() ? 1 : factor.getFactorValues().size(); List allDates = new ArrayList<>( dates.values() ); Collections.sort( allDates ); @@ -289,18 +278,14 @@ private void getBatchDataFromRawFiles( ExpressionExperiment ee, Collection getFastqHeaders( ExpressionExperiment ee ) throws IOException { + private Map assignRawHeadersToSamples( ExpressionExperiment ee, Map rawHeaders ) { Map headers = new HashMap<>(); - // map of sample ID to raw headers - Map rawHeaders = readFastqHeaders( ee ); - if ( rawHeaders == null || rawHeaders.isEmpty() ) return null; for ( BioAssay ba : ee.getBioAssays() ) { @@ -327,7 +312,7 @@ private Map getFastqHeaders( ExpressionExperiment ee ) thro } - // Note: for microarray processing dates, we persist in the Biomaterialservice.associateBatchFactor. + // Note: for microarray processing dates, we persist in the Biomaterialservice.associateBatchFactor. // The difference for RNAseq is that we want to store the entire header, which includes parts that are not needed for the batch information. bioAssayService.update( ba ); From 2dc614e7fb8305cdf0dc1d0124be834059d22a53 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 19 Jun 2024 15:57:32 -0700 Subject: [PATCH 40/81] Fix option parsing logic for BlacklistCli --- .../ubic/gemma/core/apps/BlacklistCli.java | 70 +++++++++---------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/BlacklistCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/BlacklistCli.java index a2a5544e29..d89b9d3f52 100644 --- a/gemma-cli/src/main/java/ubic/gemma/core/apps/BlacklistCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/BlacklistCli.java @@ -28,14 +28,14 @@ import ubic.gemma.core.loader.expression.geo.service.GeoBrowser; import ubic.gemma.core.util.AbstractAuthenticatedCLI; import ubic.gemma.core.util.AbstractCLI; -import ubic.gemma.model.common.description.DatabaseEntry; -import ubic.gemma.model.common.description.ExternalDatabase; import ubic.gemma.model.blacklist.BlacklistedEntity; -import ubic.gemma.model.blacklist.BlacklistedPlatform; import ubic.gemma.model.blacklist.BlacklistedExperiment; +import ubic.gemma.model.blacklist.BlacklistedPlatform; +import ubic.gemma.model.common.description.DatabaseEntry; +import ubic.gemma.model.common.description.ExternalDatabase; import ubic.gemma.model.expression.experiment.ExpressionExperiment; -import ubic.gemma.persistence.service.common.description.ExternalDatabaseService; import ubic.gemma.persistence.service.blacklist.BlacklistedEntityService; +import ubic.gemma.persistence.service.common.description.ExternalDatabaseService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import java.io.BufferedReader; @@ -54,12 +54,20 @@ public class BlacklistCli extends AbstractAuthenticatedCLI { private static final int MAX_RETRIES = 3; - String fileName = null; + + // accessions of file containing accessions to blacklist + private String accession = null; + private String fileName = null; + + // reason (if adding to blacklist) + private String reason = null; + + // remove from blacklist private boolean remove = false; + + // proactive mode private boolean proactive = false; private Collection platformsToScreen; - private String reason = null; - private String accession = null; @Override public CommandGroup getCommandGroup() { @@ -124,7 +132,7 @@ protected void doWork() throws Exception { return; } - if (reason.isEmpty()) { + if ( reason.isEmpty() ) { throw new IllegalArgumentException( "A reason for blacklisting must be provided for " + accession ); } @@ -356,46 +364,38 @@ private int fetchAndBlacklist( ExternalDatabase geo, GeoBrowser gbs, Blacklisted @Override protected void processOptions( CommandLine commandLine ) { - if ( commandLine.hasOption( "accession" ) ) { - if ( !commandLine.hasOption( "reason" ) && !commandLine.hasOption( "undo" ) ) { - throw new IllegalArgumentException( "Must provide a reason for blacklisting (unless using -sundo)" ); + if ( commandLine.hasOption( "pp" ) || commandLine.hasOption( "file" ) ) { + throw new IllegalArgumentException( "The -accession option cannot be combined with -pp or -file" ); } - - if ( commandLine.hasOption( "file" ) ) { - throw new IllegalArgumentException( "The accession option cannot be combined with the file option" ); - + if ( !commandLine.hasOption( "undo" ) && StringUtils.isBlank( commandLine.getOptionValue( "reason" ) ) ) { + throw new IllegalArgumentException( "Must provide a reason for blacklisting (unless using -undo)" ); } - this.accession = commandLine.getOptionValue( "accession" ); this.reason = commandLine.getOptionValue( "reason" ); - return; - } - - - if ( commandLine.hasOption( "pp" ) ) { - if ( this.remove || this.fileName != null ) { + this.remove = commandLine.hasOption( "undo" ); + } else if ( commandLine.hasOption( "pp" ) ) { + if ( commandLine.hasOption( "accession" ) || commandLine.hasOption( "file" ) ) { throw new IllegalArgumentException( "The pp option cannot be combined with others" ); } + if ( commandLine.hasOption( "undo" ) ) { + throw new IllegalArgumentException( "The -pp option cannot be compiled with -undo." ); + } this.proactive = true; - if ( commandLine.hasOption( "a" ) ) { this.platformsToScreen = Arrays.asList( StringUtils.split( commandLine.getOptionValue( "a" ) ) ); } - - return; - } - - if ( commandLine.hasOption( "file" ) ) { + } else if ( commandLine.hasOption( "file" ) ) { + if ( commandLine.hasOption( "accession" ) || commandLine.hasOption( "pp" ) ) { + throw new IllegalArgumentException( "The -file option cannot be combined with -pp or -accession." ); + } + if ( commandLine.hasOption( "reason" ) ) { + throw new IllegalArgumentException( "The -file option cannot be combined with -reason, use the second column to specify blacklisting reason." ); + } this.fileName = commandLine.getOptionValue( "file" ); + this.remove = commandLine.hasOption( "undo" ); } else { - throw new IllegalArgumentException( "Must provide an input file" ); + throw new IllegalArgumentException( "Must provide one of -accession, -pp or -file option." ); } - - if ( commandLine.hasOption( "undo" ) ) { - this.remove = true; - } - } - } From 5b9454a2375817df6c24962b8ff29acf67e1a5ad Mon Sep 17 00:00:00 2001 From: Paul Pavlidis Date: Wed, 19 Jun 2024 17:02:52 -0700 Subject: [PATCH 41/81] remove trailing space Probably our configuration parser should be stripping inputs --- gemma-core/src/main/resources/project.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gemma-core/src/main/resources/project.properties b/gemma-core/src/main/resources/project.properties index 0b2eea7a11..c352bf2ed3 100644 --- a/gemma-core/src/main/resources/project.properties +++ b/gemma-core/src/main/resources/project.properties @@ -53,7 +53,7 @@ smd.host=smd-ftp.stanford.edu geo.local.datafile.basepath=${gemma.download.path}/arraydata/GEO geo.host=ftp.ncbi.nih.gov geo.remote.seriesDir=geo/series/ -geo.remote.datasetDir=geo/datasets/ +geo.remote.datasetDir=geo/datasets/ geo.remote.rawDataDir=geo/series/ geo.remote.platformDir=geo/platforms/ # data sets with fewer expression samples than this will be rejected. From b010dd2e3f689aeb56cd838d01d7189f8aa36ade Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 19 Jun 2024 17:03:30 -0700 Subject: [PATCH 42/81] Check if the latest event is a BatchInformationFetchingEvent for getDatasetHasBatchInformation() This allows a BatchInformationMissingEvent to take precedence. --- .../src/main/java/ubic/gemma/rest/DatasetsWebService.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 96e3589944..7a05aca3a1 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -49,6 +49,8 @@ import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResultValueObject; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisValueObject; +import ubic.gemma.model.common.auditAndSecurity.AuditEvent; +import ubic.gemma.model.common.auditAndSecurity.eventType.BatchInformationEvent; import ubic.gemma.model.common.auditAndSecurity.eventType.BatchInformationFetchingEvent; import ubic.gemma.model.common.description.AnnotationValueObject; import ubic.gemma.model.common.description.Characteristic; @@ -1022,7 +1024,10 @@ public ResponseDataObject getDatasetHasBatchInformation( // Params: @PathParam("dataset") DatasetArg datasetArg // Required ) { ExpressionExperiment ee = datasetArgService.getEntity( datasetArg ); - return respond( this.auditEventService.hasEvent( ee, BatchInformationFetchingEvent.class ) ); + // BatchInformationEvent can either be BatchInformationFetchingEvent or BatchInformationMissingEvent, we + // consider the class of the latest one + AuditEvent event = this.auditEventService.getLastEvent( ee, BatchInformationEvent.class ); + return respond( event != null && event.getEventType() instanceof BatchInformationFetchingEvent ); } /** From 7671aea4b99201a796ba365bd779ba9da30cb386 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 19 Jun 2024 21:24:35 -0700 Subject: [PATCH 43/81] Apply the last event logic to checkHasBatchInfo() and checkBatchInfoStatus() --- .../ExpressionExperimentService.java | 13 +++- .../ExpressionExperimentServiceImpl.java | 62 ++++++++---------- .../ExpressionExperimentServiceTest.java | 65 ++++++++++++++++++- .../ubic/gemma/rest/DatasetsWebService.java | 10 +-- 4 files changed, 101 insertions(+), 49 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java index 18b3284efa..4953e833d6 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java @@ -152,11 +152,20 @@ public interface ExpressionExperimentService @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "AFTER_ACL_COLLECTION_READ" }) List browse( int start, int limit ); + /** + * Check if the given experiment has batch information. + *

+ * This does not imply that the batch information is usable or valid. Use {@link #checkBatchFetchStatus(ExpressionExperiment)} + * to get more details about the state of batch information. + */ + boolean checkHasBatchInfo( ExpressionExperiment ee ); + + /** + * Retrieve a batch information event that summarizes the state of batch information. + */ @Nullable BatchInformationEvent checkBatchFetchStatus( ExpressionExperiment ee ); - boolean checkHasBatchInfo( ExpressionExperiment ee ); - /** * returns ids of search results. * diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java index 1db4c612ee..f94681892a 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java @@ -336,52 +336,44 @@ public List browse( int start, int limit ) { @Override @Transactional(readOnly = true) public boolean checkHasBatchInfo( ExpressionExperiment ee ) { - if ( ee.getExperimentalDesign() == null ) { - return false; + if ( hasBatchFactor( ee ) ) { + return true; } - for ( ExperimentalFactor ef : ee.getExperimentalDesign().getExperimentalFactors() ) { - if ( BatchInfoPopulationServiceImpl.isBatchFactor( ef ) ) { - return true; - } - } + AuditEvent lastBatchInfoEvent = this.auditEventService.getLastEvent( ee, BatchInformationEvent.class ); - AuditEvent ev1 = this.auditEventService.getLastEvent( ee, BatchInformationMissingEvent.class ); - AuditEvent ev2 = this.auditEventService.getLastEvent( ee, FailedBatchInformationMissingEvent.class ); - if ( ev1 != null || ev2 != null ) return false; + if ( lastBatchInfoEvent == null ) + return false; - AuditEvent ev = this.auditEventService.getLastEvent( ee, BatchInformationFetchingEvent.class ); - if ( ev == null ) return false; - return ev.getEventType().getClass().isAssignableFrom( BatchInformationFetchingEvent.class ) - || ev.getEventType().getClass().isAssignableFrom( SingleBatchDeterminationEvent.class ); // + // prior to 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b, cases of missing batch information was incorrectly typed + // see https://github.com/PavlidisLab/Gemma/issues/1155 for details + if ( lastBatchInfoEvent.getEventType() instanceof FailedBatchInformationFetchingEvent + && lastBatchInfoEvent.getNote().contains( "No header file for" ) ) { + return false; + } + + return lastBatchInfoEvent.getEventType() instanceof BatchInformationFetchingEvent; } @Override @Transactional(readOnly = true) public BatchInformationEvent checkBatchFetchStatus( ExpressionExperiment ee ) { - if ( ee.getExperimentalDesign() == null ) - return null; - - for ( ExperimentalFactor ef : ee.getExperimentalDesign().getExperimentalFactors() ) { - if ( BatchInfoPopulationServiceImpl.isBatchFactor( ef ) ) { - return new BatchInformationFetchingEvent(); // signal success - } - } - - AuditEvent ev3 = this.auditEventService.getLastEvent( ee, SingletonBatchInvalidEvent.class ); - if ( ev3 != null ) { - return ( SingletonBatchInvalidEvent ) ev3.getEventType(); + if ( hasBatchFactor( ee ) ) { + return new BatchInformationFetchingEvent(); } + AuditEvent ev = auditEventService.getLastEvent( ee, BatchInformationEvent.class ); + return ev != null ? ( BatchInformationEvent ) ev.getEventType() : null; + } - AuditEvent ev2 = this.auditEventService.getLastEvent( ee, BatchInformationMissingEvent.class ); - if ( ev2 != null ) { - return ( BatchInformationMissingEvent ) ev2.getEventType(); + private boolean hasBatchFactor( ExpressionExperiment ee ) { + if ( ee.getExperimentalDesign() != null ) { + for ( ExperimentalFactor ef : ee.getExperimentalDesign().getExperimentalFactors() ) { + if ( BatchInfoPopulationServiceImpl.isBatchFactor( ef ) ) { + return true; + } + } } - - AuditEvent ev = this.auditEventService.getLastEvent( ee, BatchInformationFetchingEvent.class ); - if ( ev == null ) return null; - return ( BatchInformationFetchingEvent ) ev.getEventType(); - + return false; } /** @@ -1555,7 +1547,7 @@ public void remove( Long id ) { @Transactional public void remove( ExpressionExperiment ee ) { ee = ensureInSession( ee ); - + if ( !securityService.isEditable( ee ) ) { throw new SecurityException( "Error performing 'ExpressionExperimentService.remove(ExpressionExperiment expressionExperiment)' --> " diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java index bd66e80e5f..ae429a7c0b 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java @@ -11,8 +11,13 @@ import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests; import ubic.basecode.ontology.model.OntologyTerm; import ubic.gemma.core.analysis.preprocess.svd.SVDService; +import ubic.gemma.core.context.TestComponent; import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.core.search.SearchService; +import ubic.gemma.model.common.auditAndSecurity.AuditAction; +import ubic.gemma.model.common.auditAndSecurity.AuditEvent; +import ubic.gemma.model.common.auditAndSecurity.eventType.*; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.analysis.expression.coexpression.CoexpressionAnalysisService; import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; import ubic.gemma.persistence.service.analysis.expression.pca.PrincipalComponentAnalysisService; @@ -23,15 +28,16 @@ import ubic.gemma.persistence.service.expression.bioAssayData.BioAssayDimensionService; import ubic.gemma.persistence.service.expression.bioAssayData.RawExpressionDataVectorDao; import ubic.gemma.persistence.service.expression.biomaterial.BioMaterialService; -import ubic.gemma.persistence.service.expression.experiment.*; import ubic.gemma.persistence.util.Filter; import ubic.gemma.persistence.util.Filters; -import ubic.gemma.core.context.TestComponent; import java.util.Collections; +import java.util.Date; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.*; /** @@ -159,9 +165,12 @@ public AccessDecisionManager accessDecisionManager() { @Autowired private OntologyService ontologyService; + @Autowired + private AuditEventService auditEventService; + @After public void tearDown() { - reset( ontologyService ); + reset( ontologyService, auditEventService ); } @Test @@ -198,4 +207,54 @@ public void testGetAnnotationsUsageFrequencyWithFilters() throws TimeoutExceptio verify( expressionExperimentDao ).getAnnotationsUsageFrequency( Collections.emptyList(), null, -1, 0, null, null, null, null ); verifyNoMoreInteractions( expressionExperimentDao ); } + + @Test + public void testBatchInfo() { + AuditEventType aet; + AuditEvent ae; + ExpressionExperiment ee; + + // no batch factor, no batch info attempt + ee = new ExpressionExperiment(); + assertFalse( expressionExperimentService.checkHasBatchInfo( ee ) ); + + ee = new ExpressionExperiment(); + aet = new BatchInformationFetchingEvent(); + ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, null, null, null, aet ); + when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); + assertTrue( expressionExperimentService.checkHasBatchInfo( ee ) ); + + ee = new ExpressionExperiment(); + aet = new SingleBatchDeterminationEvent(); + ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, null, null, null, aet ); + when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); + assertTrue( expressionExperimentService.checkHasBatchInfo( ee ) ); + + ee = new ExpressionExperiment(); + aet = new BatchInformationMissingEvent(); + ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, null, null, null, aet ); + when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); + assertFalse( expressionExperimentService.checkHasBatchInfo( ee ) ); + + // batch info missing (after 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b) + ee = new ExpressionExperiment(); + aet = new BatchInformationMissingEvent(); + ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, "Error while processing FASTQ headers for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576: No header file for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576", null, null, aet ); + when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); + assertFalse( expressionExperimentService.checkHasBatchInfo( ee ) ); + + // batch info failed (prior to 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b) + ee = new ExpressionExperiment(); + aet = new FailedBatchInformationFetchingEvent(); + ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, "Error while processing FASTQ headers for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576: No header file for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576", null, null, aet ); + when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); + assertFalse( expressionExperimentService.checkHasBatchInfo( ee ) ); + + // has batch information, but it's got some issues + ee = new ExpressionExperiment(); + aet = new FailedBatchInformationFetchingEvent(); + ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, "Invalid lane for sample GSM...", null, null, aet ); + when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); + assertTrue( expressionExperimentService.checkHasBatchInfo( ee ) ); + } } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 7a05aca3a1..078f7ed55a 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -49,9 +49,6 @@ import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResultValueObject; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisValueObject; -import ubic.gemma.model.common.auditAndSecurity.AuditEvent; -import ubic.gemma.model.common.auditAndSecurity.eventType.BatchInformationEvent; -import ubic.gemma.model.common.auditAndSecurity.eventType.BatchInformationFetchingEvent; import ubic.gemma.model.common.description.AnnotationValueObject; import ubic.gemma.model.common.description.Characteristic; import ubic.gemma.model.common.description.CharacteristicValueObject; @@ -133,8 +130,6 @@ public class DatasetsWebService { @Autowired private DifferentialExpressionAnalysisService differentialExpressionAnalysisService; @Autowired - private AuditEventService auditEventService; - @Autowired private QuantitationTypeArgService quantitationTypeArgService; @Autowired private OntologyService ontologyService; @@ -1024,10 +1019,7 @@ public ResponseDataObject getDatasetHasBatchInformation( // Params: @PathParam("dataset") DatasetArg datasetArg // Required ) { ExpressionExperiment ee = datasetArgService.getEntity( datasetArg ); - // BatchInformationEvent can either be BatchInformationFetchingEvent or BatchInformationMissingEvent, we - // consider the class of the latest one - AuditEvent event = this.auditEventService.getLastEvent( ee, BatchInformationEvent.class ); - return respond( event != null && event.getEventType() instanceof BatchInformationFetchingEvent ); + return respond( expressionExperimentService.checkHasBatchInfo( ee ) ); } /** From 1a6ae1dce0d6b8bc8bc504b88acfbe2f9950a744 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 19 Jun 2024 21:50:43 -0700 Subject: [PATCH 44/81] Mark nullable fields in AuditEvent and add missing null-checks --- .../ArrayDesignSequenceManipulatingCli.java | 2 +- .../apps/ExternalDatabaseOverviewCli.java | 5 ++- .../BatchInfoPopulationServiceImpl.java | 4 +-- ...ExpressionExperimentReportServiceImpl.java | 36 +++++++++++-------- .../loader/expression/DataUpdaterImpl.java | 5 ++- .../common/auditAndSecurity/AuditEvent.java | 13 +++++-- .../auditAndSecurity/AuditEventDaoImpl.java | 4 +-- .../curation/AbstractCuratableDao.java | 3 +- .../ExpressionExperimentServiceImpl.java | 6 ++-- .../maintenance/TableMaintenanceUtilImpl.java | 2 +- .../ArrayDesignMergeServiceTest.java | 4 ++- .../geo/service/GeoDatasetServiceTest.java | 7 ++-- .../AuditTrailServiceImplTest.java | 5 ++- 13 files changed, 61 insertions(+), 35 deletions(-) diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignSequenceManipulatingCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignSequenceManipulatingCli.java index 7aceb1bafc..f7c9c6f848 100644 --- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignSequenceManipulatingCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignSequenceManipulatingCli.java @@ -265,7 +265,7 @@ private boolean needToAutoRun( ArrayDesign arrayDesign, Class, Map> getLastE for ( Class ti : types ) { Map results2 = getLastEvents( auditables, ti, null ); results.put( ti, results2.entrySet().stream() - .filter( e -> ti.isAssignableFrom( e.getValue().getEventType().getClass() ) ) + .filter( e -> e.getValue().getEventType() != null && ti.isAssignableFrom( e.getValue().getEventType().getClass() ) ) .collect( Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue ) ) ); } return results; diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/curation/AbstractCuratableDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/curation/AbstractCuratableDao.java index aab6569b8e..8b4a94e742 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/curation/AbstractCuratableDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/curation/AbstractCuratableDao.java @@ -59,8 +59,7 @@ public void updateCurationDetailsFromAuditEvent( Curatable curatable, AuditEvent curationDetails.setLastUpdated( auditEvent.getDate() ); // Update other curationDetails properties, if the event updates them. - if ( auditEvent.getEventType() != null - && CurationDetailsEvent.class.isAssignableFrom( auditEvent.getEventType().getClass() ) ) { + if ( auditEvent.getEventType() instanceof CurationDetailsEvent ) { CurationDetailsEvent eventType = ( CurationDetailsEvent ) auditEvent.getEventType(); eventType.updateCurationDetails( curationDetails, auditEvent ); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java index f94681892a..65ed4b7cd9 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java @@ -348,7 +348,7 @@ public boolean checkHasBatchInfo( ExpressionExperiment ee ) { // prior to 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b, cases of missing batch information was incorrectly typed // see https://github.com/PavlidisLab/Gemma/issues/1155 for details if ( lastBatchInfoEvent.getEventType() instanceof FailedBatchInformationFetchingEvent - && lastBatchInfoEvent.getNote().contains( "No header file for" ) ) { + && lastBatchInfoEvent.getNote() != null && lastBatchInfoEvent.getNote().contains( "No header file for" ) ) { return false; } @@ -1106,7 +1106,7 @@ private boolean checkIfSingleBatch( ExpressionExperiment ee ) { AuditEvent ev = this.auditEventService.getLastEvent( ee, BatchInformationFetchingEvent.class ); if ( ev == null ) return false; - if ( SingleBatchDeterminationEvent.class.isAssignableFrom( ev.getEventType().getClass() ) ) { + if ( ev.getEventType() instanceof SingleBatchDeterminationEvent ) { return true; } @@ -1650,7 +1650,7 @@ public boolean isBlackListed( String geoAccession ) { @Transactional(readOnly = true) public Boolean isSuitableForDEA( ExpressionExperiment ee ) { AuditEvent ev = auditEventService.getLastEvent( ee, DifferentialExpressionSuitabilityEvent.class ); - return ev == null || !UnsuitableForDifferentialExpressionAnalysisEvent.class.isAssignableFrom( ev.getEventType().getClass() ); + return ev == null || !( ev.getEventType() instanceof UnsuitableForDifferentialExpressionAnalysisEvent ); } @Override diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilImpl.java index ad059de499..cc2c694505 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/maintenance/TableMaintenanceUtilImpl.java @@ -200,7 +200,7 @@ public void updateGene2CsEntries() { if ( ae == null ) continue; // legacy of ordered-list which could end up with gaps; should // not be needed any more - if ( ae.getEventType() != null && ae.getEventType() instanceof ArrayDesignGeneMappingEvent + if ( ae.getEventType() instanceof ArrayDesignGeneMappingEvent && ae.getDate().after( status.getLastUpdate() ) ) { needToRefresh = true; annotation = a + " had probe mapping done since: " + status.getLastUpdate(); diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignMergeServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignMergeServiceTest.java index ad79ce79d1..7e2dc4b64e 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignMergeServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignMergeServiceTest.java @@ -17,7 +17,6 @@ import org.apache.commons.lang3.RandomStringUtils; import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; - import ubic.gemma.core.util.test.BaseSpringContextTest; import ubic.gemma.model.common.auditAndSecurity.eventType.ArrayDesignMergeEvent; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; @@ -75,8 +74,11 @@ public void testMerge() { assertEquals( ad1ad2ad3, ad1.getMergedInto() ); assertEquals( ad1ad2ad3, ad2.getMergedInto() ); assertEquals( ad1ad2ad3, ad3.getMergedInto() ); + assertNotNull( ad1.getAuditTrail().getLast().getEventType() ); assertEquals( ArrayDesignMergeEvent.class, ad1.getAuditTrail().getLast().getEventType().getClass() ); + assertNotNull( ad2.getAuditTrail().getLast().getEventType() ); assertEquals( ArrayDesignMergeEvent.class, ad2.getAuditTrail().getLast().getEventType().getClass() ); + assertNotNull( ad3.getAuditTrail().getLast().getEventType() ); assertEquals( ArrayDesignMergeEvent.class, ad3.getAuditTrail().getLast().getEventType().getClass() ); /* diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/geo/service/GeoDatasetServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/geo/service/GeoDatasetServiceTest.java index 9f55f9fcc5..3d27fa873d 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/geo/service/GeoDatasetServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/geo/service/GeoDatasetServiceTest.java @@ -36,6 +36,7 @@ import ubic.gemma.core.util.test.category.GeoTest; import ubic.gemma.core.util.test.category.SlowTest; import ubic.gemma.model.common.auditAndSecurity.AuditAction; +import ubic.gemma.model.common.auditAndSecurity.AuditEvent; import ubic.gemma.model.common.auditAndSecurity.eventType.GeeqEvent; import ubic.gemma.model.common.description.Characteristic; import ubic.gemma.model.common.quantitationtype.QuantitationType; @@ -302,8 +303,10 @@ public void testFetchAndLoadGSE5949() throws Exception { // creation, followed by a GeeqEvent assertEquals( AuditAction.CREATE, ee.getAuditTrail().getEvents().get( 0 ).getAction() ); assertNull( ee.getAuditTrail().getEvents().get( 0 ).getEventType() ); - assertEquals( AuditAction.UPDATE, ee.getAuditTrail().getEvents().get( 1 ).getAction() ); - assertEquals( GeeqEvent.class, ee.getAuditTrail().getEvents().get( 1 ).getEventType().getClass() ); + AuditEvent ev2 = ee.getAuditTrail().getEvents().get( 1 ); + assertEquals( AuditAction.UPDATE, ev2.getAction() ); + assertNotNull( ev2.getEventType() ); + assertEquals( GeeqEvent.class, ev2.getEventType().getClass() ); } @Test diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditTrailServiceImplTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditTrailServiceImplTest.java index 470f0c160a..1653c7d473 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditTrailServiceImplTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditTrailServiceImplTest.java @@ -35,7 +35,6 @@ import ubic.gemma.model.common.auditAndSecurity.AuditTrail; import ubic.gemma.model.common.auditAndSecurity.eventType.*; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; -import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService; import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import java.util.Collection; @@ -136,6 +135,7 @@ public final void testAddUpdateEventAuditableAuditEventTypeString() { // FIXME: one of the two date makes a round-trip in the database and is of type Timestamp (which is a subclass of Date) assertEquals( ev.getDate().getTime(), auditable.getCurationDetails().getLastUpdated().getTime() ); assertEquals( size + 1, auditTrail.getEvents().size() ); + assertNotNull( ev.getEventType() ); assertEquals( AlignmentBasedGeneMappingEvent.class, ev.getEventType().getClass() ); } @@ -157,6 +157,7 @@ public final void testAddNeedsAttentionEvent() { AuditEvent ev = auditable.getAuditTrail().getLast(); assertNotNull( ev ); assertNotNull( ev.getId() ); + assertNotNull( ev.getEventType() ); assertEquals( NeedsAttentionEvent.class, ev.getEventType().getClass() ); auditable = arrayDesignService.load( auditable.getId() ); @@ -183,6 +184,7 @@ public final void testAddDoesNotNeedsAttentionEvent() { AuditEvent ev = auditable.getAuditTrail().getLast(); assertNotNull( ev ); assertNotNull( ev.getId() ); + assertNotNull( ev.getEventType() ); assertEquals( DoesNotNeedAttentionEvent.class, ev.getEventType().getClass() ); auditable = arrayDesignService.load( auditable.getId() ); @@ -254,6 +256,7 @@ public void testAddEventWhenTransactionIsRolledBack2() { AuditEvent e = auditable.getAuditTrail().getLast(); Assert.assertEquals( AuditAction.UPDATE, e.getAction() ); assertEquals( "test", e.getNote() ); + assertNotNull( e.getDetail() ); assertTrue( e.getDetail().contains( "RuntimeException" ) ); // ensure that the exception is logged assertEquals( size + 1, auditable.getAuditTrail().getEvents().size() ); From c9b62be925cbf7264871e4d3d08fc4e8ac106cae Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 19 Jun 2024 22:18:26 -0700 Subject: [PATCH 45/81] Add missing escapeHtml4 in GeoBrowserService --- .../geo/service/GeoBrowserServiceImpl.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/geo/service/GeoBrowserServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/geo/service/GeoBrowserServiceImpl.java index 907e5b92b8..6f6378b94c 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/geo/service/GeoBrowserServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/geo/service/GeoBrowserServiceImpl.java @@ -52,6 +52,8 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import static org.apache.commons.text.StringEscapeUtils.escapeHtml4; + /** * This is marked as {@link Lazy} since we don't use it outside Gemma Web, so it won't be loaded unless it's needed. * @author pavlidis @@ -289,17 +291,17 @@ private void formatArrayDetails( NodeList gpls, StringBuilder buf, String contex if ( arrayDesign.getCurationDetails().getTroubled() ) { AuditEvent lastTroubleEvent = arrayDesign.getCurationDetails().getLastTroubledEvent(); if ( lastTroubleEvent != null ) { - trouble = " \"troubled\""; + trouble = " \"troubled\"""; } } buf.append( "

Platform in Gemma: " ).append( gpl ).append( "" ) + .append( arrayDesign.getId() ).append( "\">" ).append( escapeHtml4( gpl ) ).append( "" ) .append( trouble ); } else { - buf.append( "

" ).append( gpl ).append( " [New to Gemma]" ); + buf.append( "

" ).append( escapeHtml4( gpl ) ).append( " [New to Gemma]" ); } } } From 36c6bd0bf088e8c72e63a3fa142f56ab04d0cad1 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 20 Jun 2024 10:34:20 -0700 Subject: [PATCH 46/81] Ensure that the dataset is in the session before checking its batch factor --- .../expression/experiment/ExpressionExperimentServiceImpl.java | 1 + 1 file changed, 1 insertion(+) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java index 65ed4b7cd9..73636c6147 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java @@ -366,6 +366,7 @@ public BatchInformationEvent checkBatchFetchStatus( ExpressionExperiment ee ) { } private boolean hasBatchFactor( ExpressionExperiment ee ) { + ee = ensureInSession( ee ); if ( ee.getExperimentalDesign() != null ) { for ( ExperimentalFactor ef : ee.getExperimentalDesign().getExperimentalFactors() ) { if ( BatchInfoPopulationServiceImpl.isBatchFactor( ef ) ) { From f242971c155548ffcfd0b4d8f04f1dd2b14d8a82 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 20 Jun 2024 10:51:51 -0700 Subject: [PATCH 47/81] Use separate indicator for 'having batch info' and 'having usable batch info' --- ...xpressionExperimentDetailsValueObject.java | 3 +++ .../ExpressionExperimentService.java | 9 +++++++-- .../ExpressionExperimentServiceImpl.java | 20 +++++++++++++++++-- .../experiment/GeeqServiceImpl.java | 8 +++----- .../RNASeqBatchInfoPopulationTest.java | 6 +++--- .../ExpressionExperimentServiceTest.java | 7 +++++++ .../ExpressionExperimentController.java | 12 +++++------ 7 files changed, 46 insertions(+), 19 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentDetailsValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentDetailsValueObject.java index 4cbfa3f6a4..524496dc59 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentDetailsValueObject.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentDetailsValueObject.java @@ -54,6 +54,9 @@ public class ExpressionExperimentDetailsValueObject extends ExpressionExperiment private Date dateProcessedDataVectorComputation; private Collection differentialExpressionAnalyses = new HashSet<>(); private Collection expressionExperimentSets; + /** + * FIXME: rename this to hasUsableBatchInformation + */ private boolean hasBatchInformation; private Boolean hasBothIntensities = false; private Boolean hasCoexpressionAnalysis = false; diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java index 4953e833d6..40e57df5ff 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java @@ -155,11 +155,16 @@ public interface ExpressionExperimentService /** * Check if the given experiment has batch information. *

- * This does not imply that the batch information is usable or valid. Use {@link #checkBatchFetchStatus(ExpressionExperiment)} - * to get more details about the state of batch information. + * This does not imply that the batch information is usable or valid. Use {@link #checkHasUsableBatchInfo(ExpressionExperiment)} + * for that purpose. */ boolean checkHasBatchInfo( ExpressionExperiment ee ); + /** + * Check if the given experiment has usable batch information. + */ + boolean checkHasUsableBatchInfo( ExpressionExperiment ee ); + /** * Retrieve a batch information event that summarizes the state of batch information. */ diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java index 73636c6147..2c8f539a4e 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java @@ -355,6 +355,22 @@ public boolean checkHasBatchInfo( ExpressionExperiment ee ) { return lastBatchInfoEvent.getEventType() instanceof BatchInformationFetchingEvent; } + @Override + @Transactional(readOnly = true) + public boolean checkHasUsableBatchInfo( ExpressionExperiment ee ) { + if ( hasBatchFactor( ee ) ) { + return true; + } + + AuditEvent lastBatchInfoEvent = this.auditEventService.getLastEvent( ee, BatchInformationEvent.class ); + + if ( lastBatchInfoEvent == null ) + return false; + + return lastBatchInfoEvent.getEventType() instanceof BatchInformationFetchingEvent + && !( lastBatchInfoEvent.getEventType() instanceof FailedBatchInformationFetchingEvent ); + } + @Override @Transactional(readOnly = true) public BatchInformationEvent checkBatchFetchStatus( ExpressionExperiment ee ) { @@ -1049,8 +1065,8 @@ public Map getTaxaUsageFrequency( @Nullable Filters filters, @Nulla public String getBatchConfound( ExpressionExperiment ee ) { ee = this.thawBioAssays( ee ); - if ( !this.checkHasBatchInfo( ee ) ) { - log.info( "Experiment has no batch information, cannot check for confound: " + ee ); + if ( !this.checkHasUsableBatchInfo( ee ) ) { + log.info( "Experiment has no usable batch information, cannot check for confound: " + ee ); return null; } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/GeeqServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/GeeqServiceImpl.java index 4bf971383a..d3d460dbc8 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/GeeqServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/GeeqServiceImpl.java @@ -32,7 +32,6 @@ import ubic.gemma.core.analysis.preprocess.OutlierDetectionService; import ubic.gemma.core.analysis.preprocess.batcheffects.BatchEffectDetails; import ubic.gemma.core.analysis.service.ExpressionDataMatrixService; -import ubic.gemma.model.expression.experiment.ExperimentalDesignUtils; import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix; import ubic.gemma.model.common.auditAndSecurity.eventType.GeeqEvent; import ubic.gemma.model.common.description.BibliographicReference; @@ -499,11 +498,10 @@ private void scoreReplicates( ExpressionExperiment ee, Geeq gq ) { private boolean scoreBatchInfo( ExpressionExperiment ee, Geeq gq ) { double score; - boolean hasInfo = expressionExperimentService.checkHasBatchInfo( ee ); - - score = !hasInfo ? GeeqServiceImpl.N_10 : GeeqServiceImpl.P_10; + boolean hasUsableInfo = expressionExperimentService.checkHasUsableBatchInfo( ee ); + score = !hasUsableInfo ? GeeqServiceImpl.N_10 : GeeqServiceImpl.P_10; gq.setqScoreBatchInfo( score ); - return hasInfo; + return hasUsableInfo; } private void scoreBatchEffect( ExpressionExperiment ee, Geeq gq, boolean infoDetected, boolean confound ) { diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/RNASeqBatchInfoPopulationTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/RNASeqBatchInfoPopulationTest.java index 2ede0e41a3..868f794b0e 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/RNASeqBatchInfoPopulationTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/RNASeqBatchInfoPopulationTest.java @@ -26,6 +26,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.core.io.ClassPathResource; import ubic.basecode.util.FileTools; +import ubic.gemma.core.config.Settings; import ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest; import ubic.gemma.core.loader.expression.geo.GeoDomainObjectGeneratorLocal; import ubic.gemma.core.loader.expression.geo.service.GeoService; @@ -38,7 +39,6 @@ import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; -import ubic.gemma.core.config.Settings; import java.util.Collection; import java.util.Map; @@ -186,8 +186,8 @@ public void testGSE156689NoBatchinfo() throws Exception { Collection experimentalFactors = ee.getExperimentalDesign().getExperimentalFactors(); assertTrue( experimentalFactors.isEmpty() ); assertTrue( auditService.hasEvent( ee, FailedBatchInformationFetchingEvent.class ) ); - assertFalse( this.eeService.checkHasBatchInfo( ee ) ); - + assertTrue( this.eeService.checkHasBatchInfo( ee ) ); + assertFalse( this.eeService.checkHasUsableBatchInfo( ee ) ); } @Test(expected = FASTQHeadersPresentButNotUsableException.class) diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java index ae429a7c0b..42a7d7f101 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java @@ -217,24 +217,28 @@ public void testBatchInfo() { // no batch factor, no batch info attempt ee = new ExpressionExperiment(); assertFalse( expressionExperimentService.checkHasBatchInfo( ee ) ); + assertFalse( expressionExperimentService.checkHasUsableBatchInfo( ee ) ) ee = new ExpressionExperiment(); aet = new BatchInformationFetchingEvent(); ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, null, null, null, aet ); when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); assertTrue( expressionExperimentService.checkHasBatchInfo( ee ) ); + assertTrue( expressionExperimentService.checkHasUsableBatchInfo( ee ) ); ee = new ExpressionExperiment(); aet = new SingleBatchDeterminationEvent(); ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, null, null, null, aet ); when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); assertTrue( expressionExperimentService.checkHasBatchInfo( ee ) ); + assertTrue( expressionExperimentService.checkHasUsableBatchInfo( ee ) ); ee = new ExpressionExperiment(); aet = new BatchInformationMissingEvent(); ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, null, null, null, aet ); when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); assertFalse( expressionExperimentService.checkHasBatchInfo( ee ) ); + assertFalse( expressionExperimentService.checkHasUsableBatchInfo( ee ) ); // batch info missing (after 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b) ee = new ExpressionExperiment(); @@ -242,6 +246,7 @@ public void testBatchInfo() { ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, "Error while processing FASTQ headers for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576: No header file for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576", null, null, aet ); when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); assertFalse( expressionExperimentService.checkHasBatchInfo( ee ) ); + assertFalse( expressionExperimentService.checkHasUsableBatchInfo( ee ) ); // batch info failed (prior to 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b) ee = new ExpressionExperiment(); @@ -249,6 +254,7 @@ public void testBatchInfo() { ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, "Error while processing FASTQ headers for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576: No header file for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576", null, null, aet ); when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); assertFalse( expressionExperimentService.checkHasBatchInfo( ee ) ); + assertFalse( expressionExperimentService.checkHasUsableBatchInfo( ee ) ); // has batch information, but it's got some issues ee = new ExpressionExperiment(); @@ -256,5 +262,6 @@ public void testBatchInfo() { ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, "Invalid lane for sample GSM...", null, null, aet ); when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); assertTrue( expressionExperimentService.checkHasBatchInfo( ee ) ); + assertFalse( expressionExperimentService.checkHasUsableBatchInfo( ee ) ); } } diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java index 76a4033d1f..6203d69491 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java @@ -41,16 +41,13 @@ import ubic.gemma.core.analysis.report.WhatsNew; import ubic.gemma.core.analysis.report.WhatsNewService; import ubic.gemma.core.analysis.service.ExpressionDataFileService; -import ubic.gemma.model.expression.experiment.ExperimentalDesignUtils; -import ubic.gemma.persistence.service.common.description.BibliographicReferenceService; -import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentSearchService; +import ubic.gemma.core.job.AbstractTask; import ubic.gemma.core.job.TaskCommand; import ubic.gemma.core.job.TaskResult; import ubic.gemma.core.job.TaskRunningService; import ubic.gemma.core.loader.entrez.pubmed.PubMedSearch; import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchResultDisplayObject; -import ubic.gemma.core.job.AbstractTask; import ubic.gemma.core.tasks.analysis.expression.UpdateEEDetailsCommand; import ubic.gemma.core.tasks.analysis.expression.UpdatePubMedCommand; import ubic.gemma.model.common.auditAndSecurity.eventType.*; @@ -68,6 +65,7 @@ import ubic.gemma.persistence.service.analysis.expression.sampleCoexpression.SampleCoexpressionAnalysisService; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService; +import ubic.gemma.persistence.service.common.description.BibliographicReferenceService; import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.expression.bioAssay.BioAssayService; import ubic.gemma.persistence.service.expression.biomaterial.BioMaterialService; @@ -1169,9 +1167,9 @@ private int numOutliersRemoved( ExpressionExperiment ee ) { * @param finalResult result */ private void setBatchInfo( ExpressionExperimentDetailsValueObject finalResult, ExpressionExperiment ee ) { - boolean hasBatchInformation = expressionExperimentService.checkHasBatchInfo( ee ); - finalResult.setHasBatchInformation( hasBatchInformation ); - if ( hasBatchInformation ) { + boolean hasUsableBatchInformation = expressionExperimentService.checkHasUsableBatchInfo( ee ); + finalResult.setHasBatchInformation( hasUsableBatchInformation ); + if ( hasUsableBatchInformation ) { finalResult.setBatchConfound( expressionExperimentService.getBatchConfound( ee ) ); } finalResult.setBatchEffect( expressionExperimentService.getBatchEffect( ee ).name() ); From a2428941a8cdd7b7c97eefdf9a689c274715d6ab Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 20 Jun 2024 11:31:53 -0700 Subject: [PATCH 48/81] Refactor batch-related operations in a separate service --- .../links/LinkAnalysisServiceImpl.java | 6 +- .../BatchInfoPopulationServiceImpl.java | 7 +- ...nExperimentBatchCorrectionServiceImpl.java | 14 +- ...sionExperimentBatchInformationService.java | 55 ++++ ...ExperimentBatchInformationServiceImpl.java | 290 ++++++++++++++++++ ...ExpressionExperimentReportServiceImpl.java | 15 +- .../ExpressionDataFileServiceImpl.java | 9 +- .../ExpressionExperimentService.java | 50 --- .../ExpressionExperimentServiceImpl.java | 259 ---------------- .../experiment/GeeqServiceImpl.java | 12 +- .../RNASeqBatchInfoPopulationTest.java | 11 +- .../ExpressionDataFileServiceTest.java | 12 +- ...ExperimentBatchInformationServiceTest.java | 127 ++++++++ .../ExpressionExperimentServiceTest.java | 69 +---- .../ubic/gemma/rest/DatasetsWebService.java | 15 +- .../gemma/rest/DatasetsWebServiceTest.java | 6 + .../ExpressionExperimentController.java | 17 +- 17 files changed, 551 insertions(+), 423 deletions(-) create mode 100644 gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationService.java create mode 100644 gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationServiceImpl.java create mode 100644 gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentBatchInformationServiceTest.java diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java index ac6d01ec82..49e0694a2e 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java @@ -59,6 +59,7 @@ import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService; import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService; import ubic.gemma.persistence.service.expression.designElement.CompositeSequenceService; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import java.io.IOException; @@ -102,6 +103,9 @@ public class LinkAnalysisServiceImpl implements LinkAnalysisService { @Autowired private ProcessedExpressionDataVectorService processedExpressionDataVectorService; + @Autowired + private ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService; + @Override public LinkAnalysis process( ExpressionExperiment ee, FilterConfig filterConfig, LinkAnalysisConfig linkAnalysisConfig ) { @@ -441,7 +445,7 @@ private void qcCheck( LinkAnalysisConfig config, ExpressionExperiment ee ) throw } if ( config.isCheckForBatchEffect() ) { - BatchEffectDetails batchEffect = eeService.getBatchEffectDetails( ee ); + BatchEffectDetails batchEffect = expressionExperimentBatchInformationService.getBatchEffectDetails( ee ); if ( batchEffect.getDataWasBatchCorrected() ) { LinkAnalysisServiceImpl.log.info( "Data are batch-corrected" ); diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationServiceImpl.java index 7460a26425..46a378df68 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationServiceImpl.java @@ -91,19 +91,18 @@ public static boolean isBatchFactor( ExperimentalFactor ef ) { @Autowired private AuditEventService auditEventService; - @Autowired private AuditTrailService auditTrailService; @Autowired private BatchInfoPopulationHelperService batchInfoPopulationHelperService = null; @Autowired private BioAssayService bioAssayService; - @Autowired private ExperimentalFactorService experimentalFactorService = null; - @Autowired private ExpressionExperimentService expressionExperimentService = null; + @Autowired + private ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService; @Override @Transactional @@ -342,7 +341,7 @@ private File locateFASTQheadersForBatchInfo( String accession ) { private boolean needToRun( ExpressionExperiment ee, boolean rnaSeq ) { if ( rnaSeq ) { - return !expressionExperimentService.checkHasBatchInfo( ee ); + return !expressionExperimentBatchInformationService.checkHasBatchInfo( ee ); } if ( ee.getAccession() == null || StringUtils.isBlank( ee.getAccession().getAccession() ) ) { diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceImpl.java index a4d64a7db4..304ab1bb1c 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceImpl.java @@ -26,7 +26,6 @@ import ubic.basecode.math.MatrixStats; import ubic.basecode.util.FileTools; import ubic.gemma.core.analysis.expression.diff.LinearModelAnalyzer; -import ubic.gemma.model.expression.experiment.ExperimentalDesignUtils; import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix; import ubic.gemma.model.common.description.Characteristic; import ubic.gemma.model.common.quantitationtype.QuantitationType; @@ -35,10 +34,7 @@ import ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector; import ubic.gemma.model.expression.biomaterial.BioMaterial; import ubic.gemma.model.expression.designElement.CompositeSequence; -import ubic.gemma.model.expression.experiment.BatchEffectType; -import ubic.gemma.model.expression.experiment.ExperimentalFactor; -import ubic.gemma.model.expression.experiment.ExpressionExperiment; -import ubic.gemma.model.expression.experiment.FactorValue; +import ubic.gemma.model.expression.experiment.*; import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; @@ -60,9 +56,13 @@ public class ExpressionExperimentBatchCorrectionServiceImpl implements Expressio public static final String COLLECTION_OF_MATERIAL_URI = "http://www.ebi.ac.uk/efo/EFO_0005066"; public static final String DE_EXCLUDE_URI = "http://gemma.msl.ubc.ca/ont/TGEMO_00014"; public static final String DE_INCLUDE_URI = "http://gemma.msl.ubc.ca/ont/TGEMO_00013"; + @Autowired private ExpressionExperimentService expressionExperimentService; + @Autowired + private ExpressionExperimentBatchInformationService eeBatchService; + @Autowired private ProcessedExpressionDataVectorService processedExpressionDataVectorService; @@ -75,7 +75,7 @@ public boolean checkCorrectability( ExpressionExperiment ee ) { return false; } - BatchEffectType bet = expressionExperimentService.getBatchEffect( ee ); + BatchEffectType bet = eeBatchService.getBatchEffect( ee ); if ( BatchEffectType.NO_BATCH_EFFECT_SUCCESS.equals( bet ) || BatchEffectType.SINGLE_BATCH_SUCCESS.equals( bet ) ) { ExpressionExperimentBatchCorrectionServiceImpl.log.info( "Experiment does not require batch correction as " + "batch effect is negligible or it's a single batch: " + ee ); @@ -87,7 +87,7 @@ public boolean checkCorrectability( ExpressionExperiment ee ) { return false; } - String bConf = expressionExperimentService.getBatchConfound( ee ); + String bConf = eeBatchService.getBatchConfound( ee ); if ( bConf != null ) { // we used to let force override this, but that behavior is undesirable: if there is a confound, we don't batch correct ExpressionExperimentBatchCorrectionServiceImpl.log .info( "Experiment cannot be batch corrected due to a confound: " + bConf ); diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationService.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationService.java new file mode 100644 index 0000000000..9360674f78 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationService.java @@ -0,0 +1,55 @@ +package ubic.gemma.core.analysis.preprocess.batcheffects; + +import ubic.gemma.model.expression.experiment.BatchEffectType; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; + +import javax.annotation.Nullable; + +/** + * Provides status of batch information for datasets. + */ +public interface ExpressionExperimentBatchInformationService { + + /** + * Check if the given experiment has batch information. + *

+ * This does not imply that the batch information is usable or valid. Use {@link #checkHasUsableBatchInfo(ExpressionExperiment)} + * for that purpose. + */ + boolean checkHasBatchInfo( ExpressionExperiment ee ); + + /** + * Check if the given experiment has usable batch information. + */ + boolean checkHasUsableBatchInfo( ExpressionExperiment ee ); + + /** + * Checks the experiment for a batch confound. + * + * @param ee the experiment to check. + * @return a string describing the batch confound, or null if there was no batch confound.[FIXME: String return value is unsafe] + */ + @Nullable + String getBatchConfound( ExpressionExperiment ee ); + + /** + * Obtain the full batch effect details of a given experiment. + * @param ee experiment + * @return details for the principal component most associated with batches (even if it isn't "significant"). Note + * that we don't look at every component, just the first few. + */ + BatchEffectDetails getBatchEffectDetails( ExpressionExperiment ee ); + + /** + * Obtain a {@link BatchEffectType} describing the batch effect state of the given experiment. + * @param ee the experiment to get the batch effect for. + */ + BatchEffectType getBatchEffect( ExpressionExperiment ee ); + + /** + * Obtain a string describing the summary statistics of a batch effect is present in the given experiment. + * @return summary statistics or null if there is no batch effect + */ + @Nullable + String getBatchEffectStatistics( ExpressionExperiment ee ); +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationServiceImpl.java new file mode 100644 index 0000000000..a565300166 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationServiceImpl.java @@ -0,0 +1,290 @@ +package ubic.gemma.core.analysis.preprocess.batcheffects; + +import lombok.extern.apachecommons.CommonsLog; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import ubic.gemma.core.analysis.preprocess.svd.SVDService; +import ubic.gemma.core.analysis.preprocess.svd.SVDValueObject; +import ubic.gemma.model.common.auditAndSecurity.AuditEvent; +import ubic.gemma.model.common.auditAndSecurity.eventType.BatchInformationEvent; +import ubic.gemma.model.common.auditAndSecurity.eventType.BatchInformationFetchingEvent; +import ubic.gemma.model.common.auditAndSecurity.eventType.FailedBatchInformationFetchingEvent; +import ubic.gemma.model.common.auditAndSecurity.eventType.SingleBatchDeterminationEvent; +import ubic.gemma.model.common.quantitationtype.QuantitationType; +import ubic.gemma.model.expression.experiment.BatchEffectType; +import ubic.gemma.model.expression.experiment.ExperimentalFactor; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.model.expression.experiment.ExpressionExperimentSubSet; +import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; +import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; + +import java.util.*; + +@Service +@CommonsLog +public class ExpressionExperimentBatchInformationServiceImpl implements ExpressionExperimentBatchInformationService { + + private static final double BATCH_CONFOUND_THRESHOLD = 0.01; + private static final double BATCH_EFFECT_THRESHOLD = 0.01; + + @Autowired + private ExpressionExperimentService expressionExperimentService; + @Autowired + private SVDService svdService; + @Autowired + private AuditEventService auditEventService; + + @Override + @Transactional(readOnly = true) + public boolean checkHasBatchInfo( ExpressionExperiment ee ) { + if ( hasBatchFactor( ee ) ) { + return true; + } + + AuditEvent lastBatchInfoEvent = this.auditEventService.getLastEvent( ee, BatchInformationEvent.class ); + + if ( lastBatchInfoEvent == null ) + return false; + + // prior to 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b, cases of missing batch information was incorrectly typed + // see https://github.com/PavlidisLab/Gemma/issues/1155 for details + if ( lastBatchInfoEvent.getEventType() instanceof FailedBatchInformationFetchingEvent + && lastBatchInfoEvent.getNote() != null && lastBatchInfoEvent.getNote().contains( "No header file for" ) ) { + return false; + } + + return lastBatchInfoEvent.getEventType() instanceof BatchInformationFetchingEvent; + } + + @Override + @Transactional(readOnly = true) + public boolean checkHasUsableBatchInfo( ExpressionExperiment ee ) { + if ( hasBatchFactor( ee ) ) { + return true; + } + + AuditEvent lastBatchInfoEvent = this.auditEventService.getLastEvent( ee, BatchInformationEvent.class ); + + if ( lastBatchInfoEvent == null ) + return false; + + return lastBatchInfoEvent.getEventType() instanceof BatchInformationFetchingEvent + && !( lastBatchInfoEvent.getEventType() instanceof FailedBatchInformationFetchingEvent ); + } + + @Override + @Transactional(readOnly = true) + public String getBatchConfound( ExpressionExperiment ee ) { + ee = expressionExperimentService.thawBioAssays( ee ); + + if ( !this.checkHasUsableBatchInfo( ee ) ) { + log.info( "Experiment has no usable batch information, cannot check for confound: " + ee ); + return null; + } + + Collection confounds; + try { + confounds = BatchConfoundUtils.test( ee ); + } catch ( NotStrictlyPositiveException e ) { + log.error( String.format( "Batch confound test for %s threw a NonStrictlyPositiveException! Returning null.", ee ), e ); + return null; + } + + StringBuilder result = new StringBuilder(); + // Confounds have to be sorted in order to always get the same string + List listConfounds = new ArrayList<>( confounds ); + listConfounds.sort( Comparator.comparing( BatchConfound::toString ) ); + + for ( BatchConfound c : listConfounds ) { + if ( c.getP() < BATCH_CONFOUND_THRESHOLD ) { + String factorName = c.getEf().getName(); + if ( result.toString().isEmpty() ) { + result.append( + "One or more factors were confounded with batches in the full design; batch correction was not performed. " + + "Analyses may not be affected if performed on non-confounded subsets. Factor(s) confounded were: " ); + } else { + result.append( ", " ); + } + result.append( factorName ); + } + } + + // Now check subsets, if relevant. + if ( !listConfounds.isEmpty() && gemma.gsec.util.SecurityUtil.isUserAdmin() ) { + Collection subSets = expressionExperimentService.getSubSets( ee ); + if ( !subSets.isEmpty() ) { + for ( ExpressionExperimentSubSet subset : subSets ) { + try { + confounds = BatchConfoundUtils.test( subset ); + for ( BatchConfound c : confounds ) { + if ( c.getP() < BATCH_CONFOUND_THRESHOLD ) { + result.append( "

Confound still exists for " + c.getEf().getName() + " in " + subset ); + } + } + } catch ( NotStrictlyPositiveException e ) { + + } + } + } + } + + return StringUtils.stripToNull( result.toString() ); + } + + @Override + @Transactional(readOnly = true) + public BatchEffectDetails getBatchEffectDetails( ExpressionExperiment ee ) { + ee = expressionExperimentService.thawLiter( ee ); + + BatchEffectDetails details = new BatchEffectDetails( this.checkBatchFetchStatus( ee ), + this.hasBeenBatchCorrected( ee ), this.checkIfSingleBatch( ee ) ); + + // if missing or failed, we can't compute a P-value + if ( !details.hasBatchInformation() || details.hasProblematicBatchInformation() ) { + return details; + } + + // we can't compute a P-value for a single batch + if ( details.isSingleBatch() ) { + return details; + } + + for ( ExperimentalFactor ef : ee.getExperimentalDesign().getExperimentalFactors() ) { + if ( BatchInfoPopulationServiceImpl.isBatchFactor( ef ) ) { + SVDValueObject svd = svdService.getSvdFactorAnalysis( ee.getId() ); + if ( svd == null ) { + log.warn( "SVD was null for " + ef + ", can't compute batch effect statistics." ); + break; + } + + // Use the "date run" information as a first pass to decide if there is a batch association. + // This won't always be present. + double minP = 1.0; + if ( svd.getDatePvals() != null ) { + for ( Integer component : svd.getDatePvals().keySet() ) { + Double pVal = svd.getDatePvals().get( component ); + if ( pVal != null && pVal < minP ) { + details.setBatchEffectStatistics( pVal, component + 1, svd.getVariances()[component] ); + minP = pVal; + } + } + } + + // we can override the date-based p-value with the factor-based p-value if it is lower. + // The reason to do this is it can be underpowered. The date-based one is more sensitive. + for ( Integer component : svd.getFactorPvals().keySet() ) { + Map cmpEffects = svd.getFactorPvals().get( component ); + + // could use the effect size instead of the p-values (or in addition) + //Map cmpEffectSizes = svd.getFactorCorrelations().get( component ); + + Double pVal = cmpEffects.get( ef.getId() ); + if ( pVal != null && pVal < minP ) { + details.setBatchEffectStatistics( pVal, component + 1, svd.getVariances()[component] ); + minP = pVal; + } + + } + return details; + } + } + + log.warn( String.format( "No suitable batch factor was found for %s to obtain batch effect statistics.", ee ) ); + + return details; + } + + @Override + @Transactional(readOnly = true) + public BatchEffectType getBatchEffect( ExpressionExperiment ee ) { + BatchEffectDetails beDetails = this.getBatchEffectDetails( ee ); + BatchEffectDetails.BatchEffectStatistics batchEffectStatistics = beDetails.getBatchEffectStatistics(); + + if ( beDetails.getHasSingletonBatches() ) { + return BatchEffectType.SINGLETON_BATCHES_FAILURE; + } else if ( beDetails.getHasUninformativeBatchInformation() ) { + return BatchEffectType.UNINFORMATIVE_HEADERS_FAILURE; + } else if ( !beDetails.hasBatchInformation() ) { + return BatchEffectType.NO_BATCH_INFO; + } else if ( beDetails.hasProblematicBatchInformation() ) { + return BatchEffectType.PROBLEMATIC_BATCH_INFO_FAILURE; + } else if ( beDetails.isSingleBatch() ) { + return BatchEffectType.SINGLE_BATCH_SUCCESS; + } else if ( beDetails.getDataWasBatchCorrected() ) { + // Checked for in ExpressionExperimentDetails.js::renderStatus() + return BatchEffectType.BATCH_CORRECTED_SUCCESS; + } else { + if ( batchEffectStatistics == null ) { + return BatchEffectType.BATCH_EFFECT_UNDETERMINED_FAILURE; + } else if ( batchEffectStatistics.getPvalue() < BATCH_EFFECT_THRESHOLD ) { + // this means there was a batch effect but we couldn't correct it + return BatchEffectType.BATCH_EFFECT_FAILURE; + } else { + return BatchEffectType.NO_BATCH_EFFECT_SUCCESS; + } + } + } + + @Override + @Transactional(readOnly = true) + public String getBatchEffectStatistics( ExpressionExperiment ee ) { + BatchEffectDetails beDetails = this.getBatchEffectDetails( ee ); + if ( beDetails.getBatchEffectStatistics() != null ) { + return String.format( "This data set may have a batch artifact (PC %d), p=%.5g", + beDetails.getBatchEffectStatistics().getComponent(), + beDetails.getBatchEffectStatistics().getPvalue() ); + } + return null; + } + + private boolean checkIfSingleBatch( ExpressionExperiment ee ) { + AuditEvent ev = this.auditEventService.getLastEvent( ee, BatchInformationFetchingEvent.class ); + if ( ev == null ) return false; + + if ( ev.getEventType() instanceof SingleBatchDeterminationEvent ) { + return true; + } + + // address cases that were run prior to having the SingleBatchDeterminationEvent type. + if ( ev.getNote() != null && ( ev.getNote().startsWith( "1 batch" ) || ev.getNote().startsWith( "AffyScanDateExtractor; 0 batches" ) ) ) { + return true; + } + + return false; + } + + /** + * Retrieve a batch information event that summarizes the state of batch information. + */ + private BatchInformationEvent checkBatchFetchStatus( ExpressionExperiment ee ) { + if ( hasBatchFactor( ee ) ) { + return new BatchInformationFetchingEvent(); + } + AuditEvent ev = auditEventService.getLastEvent( ee, BatchInformationEvent.class ); + return ev != null ? ( BatchInformationEvent ) ev.getEventType() : null; + } + + private boolean hasBatchFactor( ExpressionExperiment ee ) { + ee = expressionExperimentService.thawLiter( ee ); + if ( ee.getExperimentalDesign() != null ) { + for ( ExperimentalFactor ef : ee.getExperimentalDesign().getExperimentalFactors() ) { + if ( BatchInfoPopulationServiceImpl.isBatchFactor( ef ) ) { + return true; + } + } + } + return false; + } + + private boolean hasBeenBatchCorrected( ExpressionExperiment ee ) { + for ( QuantitationType qt : ee.getQuantitationTypes() ) { + if ( qt.getIsBatchCorrected() ) { + return true; + } + } + return false; + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java index f0bd74e9ac..fb547422b1 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java @@ -32,8 +32,8 @@ import org.springframework.transaction.annotation.Transactional; import ubic.gemma.core.visualization.ExperimentalDesignVisualizationService; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisValueObject; -import ubic.gemma.model.common.auditAndSecurity.Auditable; import ubic.gemma.model.common.auditAndSecurity.AuditEvent; +import ubic.gemma.model.common.auditAndSecurity.Auditable; import ubic.gemma.model.common.auditAndSecurity.eventType.*; import ubic.gemma.model.expression.experiment.BatchEffectType; import ubic.gemma.model.expression.experiment.ExpressionExperiment; @@ -42,7 +42,8 @@ import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService; -import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationServiceImpl; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.util.EntityUtils; @@ -88,7 +89,7 @@ public class ExpressionExperimentReportServiceImpl implements ExpressionExperime @Autowired private ExpressionExperimentService expressionExperimentService; @Autowired - private ProcessedExpressionDataVectorService processedExpressionDataVectorService; + private ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService; @Autowired private BeanFactory beanFactory; @@ -101,6 +102,8 @@ public class ExpressionExperimentReportServiceImpl implements ExpressionExperime * Cache to hold stats in memory. This is used to avoid hittinig the disk for reports too often. */ private Cache statsCache; + @Autowired + private ExpressionExperimentBatchInformationServiceImpl expressionExperimentBatchInformationServiceImpl; @Override public void afterPropertiesSet() { @@ -421,10 +424,10 @@ public void recalculateBatchInfo() { @Transactional public void recalculateExperimentBatchInfo( ExpressionExperiment ee ) { ee = expressionExperimentService.thaw( ee ); - BatchEffectType effect = expressionExperimentService.getBatchEffect( ee ); - String effectStatistics = expressionExperimentService.getBatchEffectStatistics( ee ); + BatchEffectType effect = expressionExperimentBatchInformationService.getBatchEffect( ee ); + String effectStatistics = expressionExperimentBatchInformationServiceImpl.getBatchEffectStatistics( ee ); String effectSummary = effectStatistics != null ? effectStatistics : effect.name(); - String confound = expressionExperimentService.getBatchConfound( ee ); + String confound = expressionExperimentBatchInformationService.getBatchConfound( ee ); String confoundSummary = confound != null ? confound : ""; if ( !Objects.equals( confound, ee.getBatchConfound() ) ) { diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/service/ExpressionDataFileServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/service/ExpressionDataFileServiceImpl.java index 9355a25d01..691175f174 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/service/ExpressionDataFileServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/service/ExpressionDataFileServiceImpl.java @@ -32,11 +32,11 @@ import ubic.gemma.core.analysis.preprocess.ExpressionDataMatrixBuilder; import ubic.gemma.core.analysis.preprocess.filter.FilterConfig; import ubic.gemma.core.analysis.preprocess.filter.FilteringException; +import ubic.gemma.core.config.Settings; import ubic.gemma.core.datastructure.matrix.ExperimentalDesignWriter; import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix; import ubic.gemma.core.datastructure.matrix.ExpressionDataMatrix; import ubic.gemma.core.datastructure.matrix.MatrixWriter; -import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentMetaFileType; import ubic.gemma.model.analysis.expression.diff.ContrastResult; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult; @@ -53,10 +53,11 @@ import ubic.gemma.persistence.service.association.coexpression.CoexpressionValueObject; import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.expression.bioAssayData.RawAndProcessedExpressionDataVectorService; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; +import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentMetaFileType; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.util.DifferentialExpressionAnalysisResultComparator; import ubic.gemma.persistence.util.EntityUtils; -import ubic.gemma.core.config.Settings; import javax.annotation.Nullable; import javax.annotation.ParametersAreNonnullByDefault; @@ -104,6 +105,8 @@ private static ExpressionExperiment experimentForBioAssaySet( BioAssaySet bas ) @Autowired private ExpressionExperimentService expressionExperimentService; @Autowired + private ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService; + @Autowired private CoexpressionService gene2geneCoexpressionService = null; @Autowired private RawAndProcessedExpressionDataVectorService rawAndProcessedExpressionDataVectorService; @@ -1106,7 +1109,7 @@ private String makeDiffExpressionResultSetFileHeader( ExpressionAnalysisResultSe } } - String batchConf = expressionExperimentService.getBatchConfound( ee ); + String batchConf = expressionExperimentBatchInformationService.getBatchConfound( ee ); if ( batchConf != null ) { buf.append( "# !!! Warning, this dataset has a batch confound with the factors analysed\n" ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java index 40e57df5ff..ee4ea37e9b 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java @@ -24,7 +24,6 @@ import ubic.gemma.core.analysis.preprocess.batcheffects.BatchEffectDetails; import ubic.gemma.core.search.SearchException; import ubic.gemma.model.common.auditAndSecurity.AuditEvent; -import ubic.gemma.model.common.auditAndSecurity.eventType.BatchInformationEvent; import ubic.gemma.model.common.description.AnnotationValueObject; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.common.description.Characteristic; @@ -152,25 +151,6 @@ public interface ExpressionExperimentService @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "AFTER_ACL_COLLECTION_READ" }) List browse( int start, int limit ); - /** - * Check if the given experiment has batch information. - *

- * This does not imply that the batch information is usable or valid. Use {@link #checkHasUsableBatchInfo(ExpressionExperiment)} - * for that purpose. - */ - boolean checkHasBatchInfo( ExpressionExperiment ee ); - - /** - * Check if the given experiment has usable batch information. - */ - boolean checkHasUsableBatchInfo( ExpressionExperiment ee ); - - /** - * Retrieve a batch information event that summarizes the state of batch information. - */ - @Nullable - BatchInformationEvent checkBatchFetchStatus( ExpressionExperiment ee ); - /** * returns ids of search results. * @@ -456,36 +436,6 @@ class CharacteristicWithUsageStatisticsAndOntologyTerm { */ Map getTaxaUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds ); - /** - * Checks the experiment for a batch confound. - * - * @param ee the experiment to check. - * @return a string describing the batch confound, or null if there was no batch confound.[FIXME: String return value is unsafe] - */ - @Nullable - String getBatchConfound( ExpressionExperiment ee ); - - /** - * Obtain the full batch effect details of a given experiment. - * @param ee experiment - * @return details for the principal component most associated with batches (even if it isn't "significant"). Note - * that we don't look at every component, just the first few. - */ - BatchEffectDetails getBatchEffectDetails( ExpressionExperiment ee ); - - /** - * Obtain a {@link BatchEffectType} describing the batch effect state of the given experiment. - * @param ee the experiment to get the batch effect for. - */ - BatchEffectType getBatchEffect( ExpressionExperiment ee ); - - /** - * Obtain a string describing the summary statistics of a batch effect is present in the given experiment. - * @return summary statistics or null if there is no batch effect - */ - @Nullable - String getBatchEffectStatistics( ExpressionExperiment ee ); - /** * @param expressionExperiment experiment * @return the BioAssayDimensions for the study. diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java index 2c8f539a4e..c1ce5fc051 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java @@ -23,7 +23,6 @@ import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; -import org.apache.commons.math3.exception.NotStrictlyPositiveException; import org.hibernate.CacheMode; import org.hibernate.Hibernate; import org.springframework.beans.factory.annotation.Autowired; @@ -34,12 +33,7 @@ import org.springframework.util.Assert; import ubic.basecode.ontology.model.OntologyTerm; import ubic.basecode.ontology.model.OntologyTermSimple; -import ubic.gemma.core.analysis.preprocess.batcheffects.BatchConfound; -import ubic.gemma.core.analysis.preprocess.batcheffects.BatchConfoundUtils; -import ubic.gemma.core.analysis.preprocess.batcheffects.BatchEffectDetails; -import ubic.gemma.core.analysis.preprocess.batcheffects.BatchInfoPopulationServiceImpl; import ubic.gemma.core.analysis.preprocess.svd.SVDService; -import ubic.gemma.core.analysis.preprocess.svd.SVDValueObject; import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchResult; @@ -101,9 +95,6 @@ public class ExpressionExperimentServiceImpl extends AbstractFilteringVoEnabledService implements ExpressionExperimentService { - private static final double BATCH_CONFOUND_THRESHOLD = 0.01; - private static final double BATCH_EFFECT_THRESHOLD = 0.01; - private final ExpressionExperimentDao expressionExperimentDao; @Autowired @@ -333,66 +324,6 @@ public List browse( int start, int limit ) { return this.expressionExperimentDao.browse( start, limit ); } - @Override - @Transactional(readOnly = true) - public boolean checkHasBatchInfo( ExpressionExperiment ee ) { - if ( hasBatchFactor( ee ) ) { - return true; - } - - AuditEvent lastBatchInfoEvent = this.auditEventService.getLastEvent( ee, BatchInformationEvent.class ); - - if ( lastBatchInfoEvent == null ) - return false; - - // prior to 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b, cases of missing batch information was incorrectly typed - // see https://github.com/PavlidisLab/Gemma/issues/1155 for details - if ( lastBatchInfoEvent.getEventType() instanceof FailedBatchInformationFetchingEvent - && lastBatchInfoEvent.getNote() != null && lastBatchInfoEvent.getNote().contains( "No header file for" ) ) { - return false; - } - - return lastBatchInfoEvent.getEventType() instanceof BatchInformationFetchingEvent; - } - - @Override - @Transactional(readOnly = true) - public boolean checkHasUsableBatchInfo( ExpressionExperiment ee ) { - if ( hasBatchFactor( ee ) ) { - return true; - } - - AuditEvent lastBatchInfoEvent = this.auditEventService.getLastEvent( ee, BatchInformationEvent.class ); - - if ( lastBatchInfoEvent == null ) - return false; - - return lastBatchInfoEvent.getEventType() instanceof BatchInformationFetchingEvent - && !( lastBatchInfoEvent.getEventType() instanceof FailedBatchInformationFetchingEvent ); - } - - @Override - @Transactional(readOnly = true) - public BatchInformationEvent checkBatchFetchStatus( ExpressionExperiment ee ) { - if ( hasBatchFactor( ee ) ) { - return new BatchInformationFetchingEvent(); - } - AuditEvent ev = auditEventService.getLastEvent( ee, BatchInformationEvent.class ); - return ev != null ? ( BatchInformationEvent ) ev.getEventType() : null; - } - - private boolean hasBatchFactor( ExpressionExperiment ee ) { - ee = ensureInSession( ee ); - if ( ee.getExperimentalDesign() != null ) { - for ( ExperimentalFactor ef : ee.getExperimentalDesign().getExperimentalFactors() ) { - if ( BatchInfoPopulationServiceImpl.isBatchFactor( ef ) ) { - return true; - } - } - } - return false; - } - /** * returns ids of search results * @@ -1060,187 +991,6 @@ public Map getTaxaUsageFrequency( @Nullable Filters filters, @Nulla } } - @Override - @Transactional(readOnly = true) - public String getBatchConfound( ExpressionExperiment ee ) { - ee = this.thawBioAssays( ee ); - - if ( !this.checkHasUsableBatchInfo( ee ) ) { - log.info( "Experiment has no usable batch information, cannot check for confound: " + ee ); - return null; - } - - Collection confounds; - try { - confounds = BatchConfoundUtils.test( ee ); - } catch ( NotStrictlyPositiveException e ) { - AbstractService.log.error( String.format( "Batch confound test for %s threw a NonStrictlyPositiveException! Returning null.", ee ), e ); - return null; - } - - StringBuilder result = new StringBuilder(); - // Confounds have to be sorted in order to always get the same string - List listConfounds = new ArrayList<>( confounds ); - listConfounds.sort( Comparator.comparing( BatchConfound::toString ) ); - - for ( BatchConfound c : listConfounds ) { - if ( c.getP() < ExpressionExperimentServiceImpl.BATCH_CONFOUND_THRESHOLD ) { - String factorName = c.getEf().getName(); - if ( result.toString().isEmpty() ) { - result.append( - "One or more factors were confounded with batches in the full design; batch correction was not performed. " - + "Analyses may not be affected if performed on non-confounded subsets. Factor(s) confounded were: " ); - } else { - result.append( ", " ); - } - result.append( factorName ); - } - } - - // Now check subsets, if relevant. - if ( !listConfounds.isEmpty() && gemma.gsec.util.SecurityUtil.isUserAdmin() ) { - Collection subSets = this.getSubSets( ee ); - if ( !subSets.isEmpty() ) { - for ( ExpressionExperimentSubSet subset : subSets ) { - try { - confounds = BatchConfoundUtils.test( subset ); - for ( BatchConfound c : confounds ) { - if ( c.getP() < ExpressionExperimentServiceImpl.BATCH_CONFOUND_THRESHOLD ) { - result.append( "

Confound still exists for " + c.getEf().getName() + " in " + subset ); - } - } - } catch ( NotStrictlyPositiveException e ) { - - } - } - } - } - - return StringUtils.stripToNull( result.toString() ); - } - - private boolean checkIfSingleBatch( ExpressionExperiment ee ) { - AuditEvent ev = this.auditEventService.getLastEvent( ee, BatchInformationFetchingEvent.class ); - if ( ev == null ) return false; - - if ( ev.getEventType() instanceof SingleBatchDeterminationEvent ) { - return true; - } - - // address cases that were run prior to having the SingleBatchDeterminationEvent type. - if ( ev.getNote() != null && ( ev.getNote().startsWith( "1 batch" ) || ev.getNote().startsWith( "AffyScanDateExtractor; 0 batches" ) ) ) { - return true; - } - - return false; - } - - @Override - @Transactional(readOnly = true) - public BatchEffectDetails getBatchEffectDetails( ExpressionExperiment ee ) { - ee = this.thawLiter( ee ); - - BatchEffectDetails details = new BatchEffectDetails( this.checkBatchFetchStatus( ee ), - this.getHasBeenBatchCorrected( ee ), this.checkIfSingleBatch( ee ) ); - - // if missing or failed, we can't compute a P-value - if ( !details.hasBatchInformation() || details.hasProblematicBatchInformation() ) { - return details; - } - - // we can't compute a P-value for a single batch - if ( details.isSingleBatch() ) { - return details; - } - - for ( ExperimentalFactor ef : ee.getExperimentalDesign().getExperimentalFactors() ) { - if ( BatchInfoPopulationServiceImpl.isBatchFactor( ef ) ) { - SVDValueObject svd = svdService.getSvdFactorAnalysis( ee.getId() ); - if ( svd == null ) { - log.warn( "SVD was null for " + ef + ", can't compute batch effect statistics." ); - break; - } - - // Use the "date run" information as a first pass to decide if there is a batch association. - // This won't always be present. - double minP = 1.0; - if ( svd.getDatePvals() != null ) { - for ( Integer component : svd.getDatePvals().keySet() ) { - Double pVal = svd.getDatePvals().get( component ); - if ( pVal != null && pVal < minP ) { - details.setBatchEffectStatistics( pVal, component + 1, svd.getVariances()[component] ); - minP = pVal; - } - } - } - - // we can override the date-based p-value with the factor-based p-value if it is lower. - // The reason to do this is it can be underpowered. The date-based one is more sensitive. - for ( Integer component : svd.getFactorPvals().keySet() ) { - Map cmpEffects = svd.getFactorPvals().get( component ); - - // could use the effect size instead of the p-values (or in addition) - //Map cmpEffectSizes = svd.getFactorCorrelations().get( component ); - - Double pVal = cmpEffects.get( ef.getId() ); - if ( pVal != null && pVal < minP ) { - details.setBatchEffectStatistics( pVal, component + 1, svd.getVariances()[component] ); - minP = pVal; - } - - } - return details; - } - } - - log.warn( String.format( "No suitable batch factor was found for %s to obtain batch effect statistics.", ee ) ); - - return details; - } - - @Override - @Transactional(readOnly = true) - public BatchEffectType getBatchEffect( ExpressionExperiment ee ) { - BatchEffectDetails beDetails = this.getBatchEffectDetails( ee ); - BatchEffectDetails.BatchEffectStatistics batchEffectStatistics = beDetails.getBatchEffectStatistics(); - - if ( beDetails.getHasSingletonBatches() ) { - return BatchEffectType.SINGLETON_BATCHES_FAILURE; - } else if ( beDetails.getHasUninformativeBatchInformation() ) { - return BatchEffectType.UNINFORMATIVE_HEADERS_FAILURE; - } else if ( !beDetails.hasBatchInformation() ) { - return BatchEffectType.NO_BATCH_INFO; - } else if ( beDetails.hasProblematicBatchInformation() ) { - return BatchEffectType.PROBLEMATIC_BATCH_INFO_FAILURE; - } else if ( beDetails.isSingleBatch() ) { - return BatchEffectType.SINGLE_BATCH_SUCCESS; - } else if ( beDetails.getDataWasBatchCorrected() ) { - // Checked for in ExpressionExperimentDetails.js::renderStatus() - return BatchEffectType.BATCH_CORRECTED_SUCCESS; - } else { - if ( batchEffectStatistics == null ) { - return BatchEffectType.BATCH_EFFECT_UNDETERMINED_FAILURE; - } else if ( batchEffectStatistics.getPvalue() < ExpressionExperimentServiceImpl.BATCH_EFFECT_THRESHOLD ) { - // this means there was a batch effect but we couldn't correct it - return BatchEffectType.BATCH_EFFECT_FAILURE; - } else { - return BatchEffectType.NO_BATCH_EFFECT_SUCCESS; - } - } - } - - @Nullable - @Override - @Transactional(readOnly = true) - public String getBatchEffectStatistics( ExpressionExperiment ee ) { - BatchEffectDetails beDetails = this.getBatchEffectDetails( ee ); - if ( beDetails.getBatchEffectStatistics() != null ) { - return String.format( "This data set may have a batch artifact (PC %d), p=%.5g", - beDetails.getBatchEffectStatistics().getComponent(), - beDetails.getBatchEffectStatistics().getPvalue() ); - } - return null; - } @Override @Transactional(readOnly = true) @@ -1624,15 +1374,6 @@ private Collection getAnnotationsByBioMaterials } - private boolean getHasBeenBatchCorrected( ExpressionExperiment ee ) { - for ( QuantitationType qt : ee.getQuantitationTypes() ) { - if ( qt.getIsBatchCorrected() ) { - return true; - } - } - return false; - } - /** * @param ees experiments * @param type event type diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/GeeqServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/GeeqServiceImpl.java index d3d460dbc8..d59b6bed74 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/GeeqServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/GeeqServiceImpl.java @@ -31,6 +31,7 @@ import ubic.basecode.math.DescriptiveWithMissing; import ubic.gemma.core.analysis.preprocess.OutlierDetectionService; import ubic.gemma.core.analysis.preprocess.batcheffects.BatchEffectDetails; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; import ubic.gemma.core.analysis.service.ExpressionDataMatrixService; import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix; import ubic.gemma.model.common.auditAndSecurity.eventType.GeeqEvent; @@ -94,7 +95,9 @@ public class GeeqServiceImpl extends AbstractVoEnabledService experimentalFactors = ee.getExperimentalDesign().getExperimentalFactors(); assertTrue( experimentalFactors.isEmpty() ); assertTrue( auditService.hasEvent( ee, FailedBatchInformationFetchingEvent.class ) ); - assertTrue( this.eeService.checkHasBatchInfo( ee ) ); - assertFalse( this.eeService.checkHasUsableBatchInfo( ee ) ); + assertTrue( this.eeBatchService.checkHasBatchInfo( ee ) ); + assertFalse( this.eeBatchService.checkHasUsableBatchInfo( ee ) ); } @Test(expected = FASTQHeadersPresentButNotUsableException.class) diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/service/ExpressionDataFileServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/service/ExpressionDataFileServiceTest.java index b7a724e96b..0143772ad6 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/service/ExpressionDataFileServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/service/ExpressionDataFileServiceTest.java @@ -10,15 +10,16 @@ import org.springframework.context.annotation.Configuration; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests; -import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentMetaFileType; +import ubic.gemma.core.config.Settings; +import ubic.gemma.core.context.TestComponent; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; import ubic.gemma.persistence.service.association.coexpression.CoexpressionService; import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.expression.bioAssayData.RawAndProcessedExpressionDataVectorService; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; +import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentMetaFileType; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; -import ubic.gemma.core.config.Settings; -import ubic.gemma.core.context.TestComponent; import java.io.File; import java.io.IOException; @@ -60,6 +61,11 @@ public ExpressionExperimentService expressionExperimentService() { return mock( ExpressionExperimentService.class ); } + @Bean + public ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService() { + return mock(); + } + @Bean public CoexpressionService gene2geneCoexpressionService() { return mock( CoexpressionService.class ); diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentBatchInformationServiceTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentBatchInformationServiceTest.java new file mode 100644 index 0000000000..604d8ea077 --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentBatchInformationServiceTest.java @@ -0,0 +1,127 @@ +package ubic.gemma.persistence.service.expression.experiment; + +import org.junit.Before; +import org.junit.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationServiceImpl; +import ubic.gemma.core.analysis.preprocess.svd.SVDService; +import ubic.gemma.core.context.TestComponent; +import ubic.gemma.model.common.auditAndSecurity.AuditAction; +import ubic.gemma.model.common.auditAndSecurity.AuditEvent; +import ubic.gemma.model.common.auditAndSecurity.eventType.*; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; + +import java.util.Date; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +@ContextConfiguration +public class ExpressionExperimentBatchInformationServiceTest extends AbstractJUnit4SpringContextTests { + + @Configuration + @TestComponent + static class ExpressionExperimentBatchInformationServiceTestContextConfiguration { + + @Bean + public ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService() { + return new ExpressionExperimentBatchInformationServiceImpl(); + } + + @Bean + public ExpressionExperimentService expressionExperimentService() { + return mock(); + } + + @Bean + public SVDService svdService() { + return mock(); + } + + @Bean + public AuditEventService auditEventService() { + return mock(); + } + + } + + @Autowired + private ExpressionExperimentBatchInformationService eeBatchService; + + @Autowired + private ExpressionExperimentService expressionExperimentService; + + @Autowired + private AuditEventService auditEventService; + + @Before + public void setUp() { + when( expressionExperimentService.thawLiter( any() ) ).thenAnswer( a -> a.getArgument( 0 ) ); + } + + @Test + public void testBatchInfo() { + AuditEventType aet; + AuditEvent ae; + ExpressionExperiment ee; + + // no batch factor, no batch info attempt + ee = new ExpressionExperiment(); + assertFalse( eeBatchService.checkHasBatchInfo( ee ) ); + assertFalse( eeBatchService.checkHasUsableBatchInfo( ee ) ); + + ee = new ExpressionExperiment(); + aet = new BatchInformationFetchingEvent(); + ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, null, null, null, aet ); + when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); + assertTrue( eeBatchService.checkHasBatchInfo( ee ) ); + assertTrue( eeBatchService.checkHasUsableBatchInfo( ee ) ); + + ee = new ExpressionExperiment(); + aet = new SingleBatchDeterminationEvent(); + ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, null, null, null, aet ); + when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); + assertTrue( eeBatchService.checkHasBatchInfo( ee ) ); + assertTrue( eeBatchService.checkHasUsableBatchInfo( ee ) ); + + ee = new ExpressionExperiment(); + aet = new BatchInformationMissingEvent(); + ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, null, null, null, aet ); + when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); + assertFalse( eeBatchService.checkHasBatchInfo( ee ) ); + assertFalse( eeBatchService.checkHasUsableBatchInfo( ee ) ); + + // batch info missing (after 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b) + ee = new ExpressionExperiment(); + aet = new BatchInformationMissingEvent(); + ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, "Error while processing FASTQ headers for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576: No header file for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576", null, null, aet ); + when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); + assertFalse( eeBatchService.checkHasBatchInfo( ee ) ); + assertFalse( eeBatchService.checkHasUsableBatchInfo( ee ) ); + + // batch info failed (prior to 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b) + ee = new ExpressionExperiment(); + aet = new FailedBatchInformationFetchingEvent(); + ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, "Error while processing FASTQ headers for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576: No header file for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576", null, null, aet ); + when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); + assertFalse( eeBatchService.checkHasBatchInfo( ee ) ); + assertFalse( eeBatchService.checkHasUsableBatchInfo( ee ) ); + + // has batch information, but it's got some issues + ee = new ExpressionExperiment(); + aet = new FailedBatchInformationFetchingEvent(); + ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, "Invalid lane for sample GSM...", null, null, aet ); + when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); + assertTrue( eeBatchService.checkHasBatchInfo( ee ) ); + assertFalse( eeBatchService.checkHasUsableBatchInfo( ee ) ); + } +} \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java index 42a7d7f101..9c605b495f 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java @@ -14,10 +14,6 @@ import ubic.gemma.core.context.TestComponent; import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.core.search.SearchService; -import ubic.gemma.model.common.auditAndSecurity.AuditAction; -import ubic.gemma.model.common.auditAndSecurity.AuditEvent; -import ubic.gemma.model.common.auditAndSecurity.eventType.*; -import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.analysis.expression.coexpression.CoexpressionAnalysisService; import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; import ubic.gemma.persistence.service.analysis.expression.pca.PrincipalComponentAnalysisService; @@ -32,12 +28,9 @@ import ubic.gemma.persistence.util.Filters; import java.util.Collections; -import java.util.Date; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.*; /** @@ -165,12 +158,9 @@ public AccessDecisionManager accessDecisionManager() { @Autowired private OntologyService ontologyService; - @Autowired - private AuditEventService auditEventService; - @After public void tearDown() { - reset( ontologyService, auditEventService ); + reset( ontologyService ); } @Test @@ -207,61 +197,4 @@ public void testGetAnnotationsUsageFrequencyWithFilters() throws TimeoutExceptio verify( expressionExperimentDao ).getAnnotationsUsageFrequency( Collections.emptyList(), null, -1, 0, null, null, null, null ); verifyNoMoreInteractions( expressionExperimentDao ); } - - @Test - public void testBatchInfo() { - AuditEventType aet; - AuditEvent ae; - ExpressionExperiment ee; - - // no batch factor, no batch info attempt - ee = new ExpressionExperiment(); - assertFalse( expressionExperimentService.checkHasBatchInfo( ee ) ); - assertFalse( expressionExperimentService.checkHasUsableBatchInfo( ee ) ) - - ee = new ExpressionExperiment(); - aet = new BatchInformationFetchingEvent(); - ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, null, null, null, aet ); - when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); - assertTrue( expressionExperimentService.checkHasBatchInfo( ee ) ); - assertTrue( expressionExperimentService.checkHasUsableBatchInfo( ee ) ); - - ee = new ExpressionExperiment(); - aet = new SingleBatchDeterminationEvent(); - ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, null, null, null, aet ); - when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); - assertTrue( expressionExperimentService.checkHasBatchInfo( ee ) ); - assertTrue( expressionExperimentService.checkHasUsableBatchInfo( ee ) ); - - ee = new ExpressionExperiment(); - aet = new BatchInformationMissingEvent(); - ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, null, null, null, aet ); - when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); - assertFalse( expressionExperimentService.checkHasBatchInfo( ee ) ); - assertFalse( expressionExperimentService.checkHasUsableBatchInfo( ee ) ); - - // batch info missing (after 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b) - ee = new ExpressionExperiment(); - aet = new BatchInformationMissingEvent(); - ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, "Error while processing FASTQ headers for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576: No header file for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576", null, null, aet ); - when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); - assertFalse( expressionExperimentService.checkHasBatchInfo( ee ) ); - assertFalse( expressionExperimentService.checkHasUsableBatchInfo( ee ) ); - - // batch info failed (prior to 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b) - ee = new ExpressionExperiment(); - aet = new FailedBatchInformationFetchingEvent(); - ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, "Error while processing FASTQ headers for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576: No header file for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576", null, null, aet ); - when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); - assertFalse( expressionExperimentService.checkHasBatchInfo( ee ) ); - assertFalse( expressionExperimentService.checkHasUsableBatchInfo( ee ) ); - - // has batch information, but it's got some issues - ee = new ExpressionExperiment(); - aet = new FailedBatchInformationFetchingEvent(); - ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, "Invalid lane for sample GSM...", null, null, aet ); - when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); - assertTrue( expressionExperimentService.checkHasBatchInfo( ee ) ); - assertFalse( expressionExperimentService.checkHasUsableBatchInfo( ee ) ); - } } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 078f7ed55a..2b93a36240 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -70,9 +70,9 @@ import ubic.gemma.model.genome.gene.GeneValueObject; import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionResultService; -import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.util.Filters; import ubic.gemma.persistence.util.Slice; @@ -143,6 +143,8 @@ public class DatasetsWebService { private TaxonArgService taxonArgService; @Autowired private DifferentialExpressionResultService differentialExpressionResultService; + @Autowired + private ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService; @Context private UriInfo uriInfo; @@ -1005,11 +1007,10 @@ public Response getDatasetDesign( // Params: } /** - * Returns true if the experiment has had batch information successfully filled in. This will be true even if there - * is only one batch. It does not reflect the presence or absence of a batch effect. - * - * @param datasetArg can either be the ExpressionExperiment ID or its short name (e.g. GSE1234). Retrieval by ID - * is more efficient. Only datasets that user has access to will be available. + * Indicate if the experiment has batch information. + *

+ * This does not imply that the batch information is usable. This will be true even if there is only one batch. It + * does not reflect the presence or absence of a batch effect. */ @GET @Path("/{dataset}/hasbatch") @@ -1019,7 +1020,7 @@ public ResponseDataObject getDatasetHasBatchInformation( // Params: @PathParam("dataset") DatasetArg datasetArg // Required ) { ExpressionExperiment ee = datasetArgService.getEntity( datasetArg ); - return respond( expressionExperimentService.checkHasBatchInfo( ee ) ); + return respond( expressionExperimentBatchInformationService.checkHasBatchInfo( ee ) ); } /** diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java index 2f4f28f6d4..b00b320dfd 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java @@ -41,6 +41,7 @@ import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.expression.bioAssay.BioAssayService; import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.util.Filter; import ubic.gemma.persistence.util.Filters; @@ -193,6 +194,11 @@ public DatabaseEntryArgService databaseEntryArgService() { public ExpressionExperimentReportService expressionExperimentReportService() { return mock(); } + + @Bean + public ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService() { + return mock(); + } } @Autowired diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java index 6203d69491..7a72290f08 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java @@ -36,6 +36,7 @@ import ubic.gemma.core.analysis.preprocess.MeanVarianceService; import ubic.gemma.core.analysis.preprocess.OutlierDetails; import ubic.gemma.core.analysis.preprocess.OutlierDetectionService; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; import ubic.gemma.core.analysis.preprocess.svd.SVDService; import ubic.gemma.core.analysis.report.ExpressionExperimentReportService; import ubic.gemma.core.analysis.report.WhatsNew; @@ -124,6 +125,8 @@ public class ExpressionExperimentController { @Autowired private ExpressionExperimentService expressionExperimentService; @Autowired + private ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService; + @Autowired private AuditTrailService auditTrailService; @Autowired private ExpressionExperimentSearchService expressionExperimentSearchService; @@ -611,14 +614,14 @@ public ExpressionExperimentDetailsValueObject loadExpressionExperimentDetails( L public void recalculateBatchConfound( Long id ) { ExpressionExperiment ee = getExperimentById( id, false ); - ee.setBatchConfound( expressionExperimentService.getBatchConfound( ee ) ); + ee.setBatchConfound( expressionExperimentBatchInformationService.getBatchConfound( ee ) ); expressionExperimentService.update( ee ); } public void recalculateBatchEffect( Long id ) { ExpressionExperiment ee = getExperimentById( id, false ); - ee.setBatchEffect( expressionExperimentService.getBatchEffect( ee ) ); - ee.setBatchEffectStatistics( expressionExperimentService.getBatchEffectStatistics( ee ) ); + ee.setBatchEffect( expressionExperimentBatchInformationService.getBatchEffect( ee ) ); + ee.setBatchEffectStatistics( expressionExperimentBatchInformationService.getBatchEffectStatistics( ee ) ); expressionExperimentService.update( ee ); } @@ -1167,13 +1170,13 @@ private int numOutliersRemoved( ExpressionExperiment ee ) { * @param finalResult result */ private void setBatchInfo( ExpressionExperimentDetailsValueObject finalResult, ExpressionExperiment ee ) { - boolean hasUsableBatchInformation = expressionExperimentService.checkHasUsableBatchInfo( ee ); + boolean hasUsableBatchInformation = expressionExperimentBatchInformationService.checkHasUsableBatchInfo( ee ); finalResult.setHasBatchInformation( hasUsableBatchInformation ); if ( hasUsableBatchInformation ) { - finalResult.setBatchConfound( expressionExperimentService.getBatchConfound( ee ) ); + finalResult.setBatchConfound( expressionExperimentBatchInformationService.getBatchConfound( ee ) ); } - finalResult.setBatchEffect( expressionExperimentService.getBatchEffect( ee ).name() ); - finalResult.setBatchEffectStatistics( expressionExperimentService.getBatchEffectStatistics( ee ) ); + finalResult.setBatchEffect( expressionExperimentBatchInformationService.getBatchEffect( ee ).name() ); + finalResult.setBatchEffectStatistics( expressionExperimentBatchInformationService.getBatchEffectStatistics( ee ) ); } /** From c1ec95695cbfb2902358ea98750d92e944e5a0b5 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 20 Jun 2024 11:31:53 -0700 Subject: [PATCH 49/81] Refactor batch-related operations in a separate service --- .../links/LinkAnalysisServiceImpl.java | 6 +- .../BatchInfoPopulationServiceImpl.java | 7 +- ...nExperimentBatchCorrectionServiceImpl.java | 14 +- ...sionExperimentBatchInformationService.java | 55 ++++ ...ExperimentBatchInformationServiceImpl.java | 290 ++++++++++++++++++ ...ExpressionExperimentReportServiceImpl.java | 12 +- .../ExpressionDataFileServiceImpl.java | 9 +- .../ExpressionExperimentService.java | 50 --- .../ExpressionExperimentServiceImpl.java | 259 ---------------- .../experiment/GeeqServiceImpl.java | 12 +- .../RNASeqBatchInfoPopulationTest.java | 11 +- .../ExpressionDataFileServiceTest.java | 12 +- ...ExperimentBatchInformationServiceTest.java | 127 ++++++++ .../ExpressionExperimentServiceTest.java | 69 +---- .../ubic/gemma/rest/DatasetsWebService.java | 15 +- .../gemma/rest/DatasetsWebServiceTest.java | 6 + .../ExpressionExperimentController.java | 17 +- 17 files changed, 548 insertions(+), 423 deletions(-) create mode 100644 gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationService.java create mode 100644 gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationServiceImpl.java create mode 100644 gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentBatchInformationServiceTest.java diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java index ac6d01ec82..49e0694a2e 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java @@ -59,6 +59,7 @@ import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService; import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService; import ubic.gemma.persistence.service.expression.designElement.CompositeSequenceService; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import java.io.IOException; @@ -102,6 +103,9 @@ public class LinkAnalysisServiceImpl implements LinkAnalysisService { @Autowired private ProcessedExpressionDataVectorService processedExpressionDataVectorService; + @Autowired + private ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService; + @Override public LinkAnalysis process( ExpressionExperiment ee, FilterConfig filterConfig, LinkAnalysisConfig linkAnalysisConfig ) { @@ -441,7 +445,7 @@ private void qcCheck( LinkAnalysisConfig config, ExpressionExperiment ee ) throw } if ( config.isCheckForBatchEffect() ) { - BatchEffectDetails batchEffect = eeService.getBatchEffectDetails( ee ); + BatchEffectDetails batchEffect = expressionExperimentBatchInformationService.getBatchEffectDetails( ee ); if ( batchEffect.getDataWasBatchCorrected() ) { LinkAnalysisServiceImpl.log.info( "Data are batch-corrected" ); diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationServiceImpl.java index 7460a26425..46a378df68 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchInfoPopulationServiceImpl.java @@ -91,19 +91,18 @@ public static boolean isBatchFactor( ExperimentalFactor ef ) { @Autowired private AuditEventService auditEventService; - @Autowired private AuditTrailService auditTrailService; @Autowired private BatchInfoPopulationHelperService batchInfoPopulationHelperService = null; @Autowired private BioAssayService bioAssayService; - @Autowired private ExperimentalFactorService experimentalFactorService = null; - @Autowired private ExpressionExperimentService expressionExperimentService = null; + @Autowired + private ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService; @Override @Transactional @@ -342,7 +341,7 @@ private File locateFASTQheadersForBatchInfo( String accession ) { private boolean needToRun( ExpressionExperiment ee, boolean rnaSeq ) { if ( rnaSeq ) { - return !expressionExperimentService.checkHasBatchInfo( ee ); + return !expressionExperimentBatchInformationService.checkHasBatchInfo( ee ); } if ( ee.getAccession() == null || StringUtils.isBlank( ee.getAccession().getAccession() ) ) { diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceImpl.java index a4d64a7db4..304ab1bb1c 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceImpl.java @@ -26,7 +26,6 @@ import ubic.basecode.math.MatrixStats; import ubic.basecode.util.FileTools; import ubic.gemma.core.analysis.expression.diff.LinearModelAnalyzer; -import ubic.gemma.model.expression.experiment.ExperimentalDesignUtils; import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix; import ubic.gemma.model.common.description.Characteristic; import ubic.gemma.model.common.quantitationtype.QuantitationType; @@ -35,10 +34,7 @@ import ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector; import ubic.gemma.model.expression.biomaterial.BioMaterial; import ubic.gemma.model.expression.designElement.CompositeSequence; -import ubic.gemma.model.expression.experiment.BatchEffectType; -import ubic.gemma.model.expression.experiment.ExperimentalFactor; -import ubic.gemma.model.expression.experiment.ExpressionExperiment; -import ubic.gemma.model.expression.experiment.FactorValue; +import ubic.gemma.model.expression.experiment.*; import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; @@ -60,9 +56,13 @@ public class ExpressionExperimentBatchCorrectionServiceImpl implements Expressio public static final String COLLECTION_OF_MATERIAL_URI = "http://www.ebi.ac.uk/efo/EFO_0005066"; public static final String DE_EXCLUDE_URI = "http://gemma.msl.ubc.ca/ont/TGEMO_00014"; public static final String DE_INCLUDE_URI = "http://gemma.msl.ubc.ca/ont/TGEMO_00013"; + @Autowired private ExpressionExperimentService expressionExperimentService; + @Autowired + private ExpressionExperimentBatchInformationService eeBatchService; + @Autowired private ProcessedExpressionDataVectorService processedExpressionDataVectorService; @@ -75,7 +75,7 @@ public boolean checkCorrectability( ExpressionExperiment ee ) { return false; } - BatchEffectType bet = expressionExperimentService.getBatchEffect( ee ); + BatchEffectType bet = eeBatchService.getBatchEffect( ee ); if ( BatchEffectType.NO_BATCH_EFFECT_SUCCESS.equals( bet ) || BatchEffectType.SINGLE_BATCH_SUCCESS.equals( bet ) ) { ExpressionExperimentBatchCorrectionServiceImpl.log.info( "Experiment does not require batch correction as " + "batch effect is negligible or it's a single batch: " + ee ); @@ -87,7 +87,7 @@ public boolean checkCorrectability( ExpressionExperiment ee ) { return false; } - String bConf = expressionExperimentService.getBatchConfound( ee ); + String bConf = eeBatchService.getBatchConfound( ee ); if ( bConf != null ) { // we used to let force override this, but that behavior is undesirable: if there is a confound, we don't batch correct ExpressionExperimentBatchCorrectionServiceImpl.log .info( "Experiment cannot be batch corrected due to a confound: " + bConf ); diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationService.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationService.java new file mode 100644 index 0000000000..9360674f78 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationService.java @@ -0,0 +1,55 @@ +package ubic.gemma.core.analysis.preprocess.batcheffects; + +import ubic.gemma.model.expression.experiment.BatchEffectType; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; + +import javax.annotation.Nullable; + +/** + * Provides status of batch information for datasets. + */ +public interface ExpressionExperimentBatchInformationService { + + /** + * Check if the given experiment has batch information. + *

+ * This does not imply that the batch information is usable or valid. Use {@link #checkHasUsableBatchInfo(ExpressionExperiment)} + * for that purpose. + */ + boolean checkHasBatchInfo( ExpressionExperiment ee ); + + /** + * Check if the given experiment has usable batch information. + */ + boolean checkHasUsableBatchInfo( ExpressionExperiment ee ); + + /** + * Checks the experiment for a batch confound. + * + * @param ee the experiment to check. + * @return a string describing the batch confound, or null if there was no batch confound.[FIXME: String return value is unsafe] + */ + @Nullable + String getBatchConfound( ExpressionExperiment ee ); + + /** + * Obtain the full batch effect details of a given experiment. + * @param ee experiment + * @return details for the principal component most associated with batches (even if it isn't "significant"). Note + * that we don't look at every component, just the first few. + */ + BatchEffectDetails getBatchEffectDetails( ExpressionExperiment ee ); + + /** + * Obtain a {@link BatchEffectType} describing the batch effect state of the given experiment. + * @param ee the experiment to get the batch effect for. + */ + BatchEffectType getBatchEffect( ExpressionExperiment ee ); + + /** + * Obtain a string describing the summary statistics of a batch effect is present in the given experiment. + * @return summary statistics or null if there is no batch effect + */ + @Nullable + String getBatchEffectStatistics( ExpressionExperiment ee ); +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationServiceImpl.java new file mode 100644 index 0000000000..a565300166 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationServiceImpl.java @@ -0,0 +1,290 @@ +package ubic.gemma.core.analysis.preprocess.batcheffects; + +import lombok.extern.apachecommons.CommonsLog; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import ubic.gemma.core.analysis.preprocess.svd.SVDService; +import ubic.gemma.core.analysis.preprocess.svd.SVDValueObject; +import ubic.gemma.model.common.auditAndSecurity.AuditEvent; +import ubic.gemma.model.common.auditAndSecurity.eventType.BatchInformationEvent; +import ubic.gemma.model.common.auditAndSecurity.eventType.BatchInformationFetchingEvent; +import ubic.gemma.model.common.auditAndSecurity.eventType.FailedBatchInformationFetchingEvent; +import ubic.gemma.model.common.auditAndSecurity.eventType.SingleBatchDeterminationEvent; +import ubic.gemma.model.common.quantitationtype.QuantitationType; +import ubic.gemma.model.expression.experiment.BatchEffectType; +import ubic.gemma.model.expression.experiment.ExperimentalFactor; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.model.expression.experiment.ExpressionExperimentSubSet; +import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; +import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; + +import java.util.*; + +@Service +@CommonsLog +public class ExpressionExperimentBatchInformationServiceImpl implements ExpressionExperimentBatchInformationService { + + private static final double BATCH_CONFOUND_THRESHOLD = 0.01; + private static final double BATCH_EFFECT_THRESHOLD = 0.01; + + @Autowired + private ExpressionExperimentService expressionExperimentService; + @Autowired + private SVDService svdService; + @Autowired + private AuditEventService auditEventService; + + @Override + @Transactional(readOnly = true) + public boolean checkHasBatchInfo( ExpressionExperiment ee ) { + if ( hasBatchFactor( ee ) ) { + return true; + } + + AuditEvent lastBatchInfoEvent = this.auditEventService.getLastEvent( ee, BatchInformationEvent.class ); + + if ( lastBatchInfoEvent == null ) + return false; + + // prior to 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b, cases of missing batch information was incorrectly typed + // see https://github.com/PavlidisLab/Gemma/issues/1155 for details + if ( lastBatchInfoEvent.getEventType() instanceof FailedBatchInformationFetchingEvent + && lastBatchInfoEvent.getNote() != null && lastBatchInfoEvent.getNote().contains( "No header file for" ) ) { + return false; + } + + return lastBatchInfoEvent.getEventType() instanceof BatchInformationFetchingEvent; + } + + @Override + @Transactional(readOnly = true) + public boolean checkHasUsableBatchInfo( ExpressionExperiment ee ) { + if ( hasBatchFactor( ee ) ) { + return true; + } + + AuditEvent lastBatchInfoEvent = this.auditEventService.getLastEvent( ee, BatchInformationEvent.class ); + + if ( lastBatchInfoEvent == null ) + return false; + + return lastBatchInfoEvent.getEventType() instanceof BatchInformationFetchingEvent + && !( lastBatchInfoEvent.getEventType() instanceof FailedBatchInformationFetchingEvent ); + } + + @Override + @Transactional(readOnly = true) + public String getBatchConfound( ExpressionExperiment ee ) { + ee = expressionExperimentService.thawBioAssays( ee ); + + if ( !this.checkHasUsableBatchInfo( ee ) ) { + log.info( "Experiment has no usable batch information, cannot check for confound: " + ee ); + return null; + } + + Collection confounds; + try { + confounds = BatchConfoundUtils.test( ee ); + } catch ( NotStrictlyPositiveException e ) { + log.error( String.format( "Batch confound test for %s threw a NonStrictlyPositiveException! Returning null.", ee ), e ); + return null; + } + + StringBuilder result = new StringBuilder(); + // Confounds have to be sorted in order to always get the same string + List listConfounds = new ArrayList<>( confounds ); + listConfounds.sort( Comparator.comparing( BatchConfound::toString ) ); + + for ( BatchConfound c : listConfounds ) { + if ( c.getP() < BATCH_CONFOUND_THRESHOLD ) { + String factorName = c.getEf().getName(); + if ( result.toString().isEmpty() ) { + result.append( + "One or more factors were confounded with batches in the full design; batch correction was not performed. " + + "Analyses may not be affected if performed on non-confounded subsets. Factor(s) confounded were: " ); + } else { + result.append( ", " ); + } + result.append( factorName ); + } + } + + // Now check subsets, if relevant. + if ( !listConfounds.isEmpty() && gemma.gsec.util.SecurityUtil.isUserAdmin() ) { + Collection subSets = expressionExperimentService.getSubSets( ee ); + if ( !subSets.isEmpty() ) { + for ( ExpressionExperimentSubSet subset : subSets ) { + try { + confounds = BatchConfoundUtils.test( subset ); + for ( BatchConfound c : confounds ) { + if ( c.getP() < BATCH_CONFOUND_THRESHOLD ) { + result.append( "

Confound still exists for " + c.getEf().getName() + " in " + subset ); + } + } + } catch ( NotStrictlyPositiveException e ) { + + } + } + } + } + + return StringUtils.stripToNull( result.toString() ); + } + + @Override + @Transactional(readOnly = true) + public BatchEffectDetails getBatchEffectDetails( ExpressionExperiment ee ) { + ee = expressionExperimentService.thawLiter( ee ); + + BatchEffectDetails details = new BatchEffectDetails( this.checkBatchFetchStatus( ee ), + this.hasBeenBatchCorrected( ee ), this.checkIfSingleBatch( ee ) ); + + // if missing or failed, we can't compute a P-value + if ( !details.hasBatchInformation() || details.hasProblematicBatchInformation() ) { + return details; + } + + // we can't compute a P-value for a single batch + if ( details.isSingleBatch() ) { + return details; + } + + for ( ExperimentalFactor ef : ee.getExperimentalDesign().getExperimentalFactors() ) { + if ( BatchInfoPopulationServiceImpl.isBatchFactor( ef ) ) { + SVDValueObject svd = svdService.getSvdFactorAnalysis( ee.getId() ); + if ( svd == null ) { + log.warn( "SVD was null for " + ef + ", can't compute batch effect statistics." ); + break; + } + + // Use the "date run" information as a first pass to decide if there is a batch association. + // This won't always be present. + double minP = 1.0; + if ( svd.getDatePvals() != null ) { + for ( Integer component : svd.getDatePvals().keySet() ) { + Double pVal = svd.getDatePvals().get( component ); + if ( pVal != null && pVal < minP ) { + details.setBatchEffectStatistics( pVal, component + 1, svd.getVariances()[component] ); + minP = pVal; + } + } + } + + // we can override the date-based p-value with the factor-based p-value if it is lower. + // The reason to do this is it can be underpowered. The date-based one is more sensitive. + for ( Integer component : svd.getFactorPvals().keySet() ) { + Map cmpEffects = svd.getFactorPvals().get( component ); + + // could use the effect size instead of the p-values (or in addition) + //Map cmpEffectSizes = svd.getFactorCorrelations().get( component ); + + Double pVal = cmpEffects.get( ef.getId() ); + if ( pVal != null && pVal < minP ) { + details.setBatchEffectStatistics( pVal, component + 1, svd.getVariances()[component] ); + minP = pVal; + } + + } + return details; + } + } + + log.warn( String.format( "No suitable batch factor was found for %s to obtain batch effect statistics.", ee ) ); + + return details; + } + + @Override + @Transactional(readOnly = true) + public BatchEffectType getBatchEffect( ExpressionExperiment ee ) { + BatchEffectDetails beDetails = this.getBatchEffectDetails( ee ); + BatchEffectDetails.BatchEffectStatistics batchEffectStatistics = beDetails.getBatchEffectStatistics(); + + if ( beDetails.getHasSingletonBatches() ) { + return BatchEffectType.SINGLETON_BATCHES_FAILURE; + } else if ( beDetails.getHasUninformativeBatchInformation() ) { + return BatchEffectType.UNINFORMATIVE_HEADERS_FAILURE; + } else if ( !beDetails.hasBatchInformation() ) { + return BatchEffectType.NO_BATCH_INFO; + } else if ( beDetails.hasProblematicBatchInformation() ) { + return BatchEffectType.PROBLEMATIC_BATCH_INFO_FAILURE; + } else if ( beDetails.isSingleBatch() ) { + return BatchEffectType.SINGLE_BATCH_SUCCESS; + } else if ( beDetails.getDataWasBatchCorrected() ) { + // Checked for in ExpressionExperimentDetails.js::renderStatus() + return BatchEffectType.BATCH_CORRECTED_SUCCESS; + } else { + if ( batchEffectStatistics == null ) { + return BatchEffectType.BATCH_EFFECT_UNDETERMINED_FAILURE; + } else if ( batchEffectStatistics.getPvalue() < BATCH_EFFECT_THRESHOLD ) { + // this means there was a batch effect but we couldn't correct it + return BatchEffectType.BATCH_EFFECT_FAILURE; + } else { + return BatchEffectType.NO_BATCH_EFFECT_SUCCESS; + } + } + } + + @Override + @Transactional(readOnly = true) + public String getBatchEffectStatistics( ExpressionExperiment ee ) { + BatchEffectDetails beDetails = this.getBatchEffectDetails( ee ); + if ( beDetails.getBatchEffectStatistics() != null ) { + return String.format( "This data set may have a batch artifact (PC %d), p=%.5g", + beDetails.getBatchEffectStatistics().getComponent(), + beDetails.getBatchEffectStatistics().getPvalue() ); + } + return null; + } + + private boolean checkIfSingleBatch( ExpressionExperiment ee ) { + AuditEvent ev = this.auditEventService.getLastEvent( ee, BatchInformationFetchingEvent.class ); + if ( ev == null ) return false; + + if ( ev.getEventType() instanceof SingleBatchDeterminationEvent ) { + return true; + } + + // address cases that were run prior to having the SingleBatchDeterminationEvent type. + if ( ev.getNote() != null && ( ev.getNote().startsWith( "1 batch" ) || ev.getNote().startsWith( "AffyScanDateExtractor; 0 batches" ) ) ) { + return true; + } + + return false; + } + + /** + * Retrieve a batch information event that summarizes the state of batch information. + */ + private BatchInformationEvent checkBatchFetchStatus( ExpressionExperiment ee ) { + if ( hasBatchFactor( ee ) ) { + return new BatchInformationFetchingEvent(); + } + AuditEvent ev = auditEventService.getLastEvent( ee, BatchInformationEvent.class ); + return ev != null ? ( BatchInformationEvent ) ev.getEventType() : null; + } + + private boolean hasBatchFactor( ExpressionExperiment ee ) { + ee = expressionExperimentService.thawLiter( ee ); + if ( ee.getExperimentalDesign() != null ) { + for ( ExperimentalFactor ef : ee.getExperimentalDesign().getExperimentalFactors() ) { + if ( BatchInfoPopulationServiceImpl.isBatchFactor( ef ) ) { + return true; + } + } + } + return false; + } + + private boolean hasBeenBatchCorrected( ExpressionExperiment ee ) { + for ( QuantitationType qt : ee.getQuantitationTypes() ) { + if ( qt.getIsBatchCorrected() ) { + return true; + } + } + return false; + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java index f0bd74e9ac..3dbe8ff14e 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java @@ -30,10 +30,11 @@ import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Propagation; import org.springframework.transaction.annotation.Transactional; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; import ubic.gemma.core.visualization.ExperimentalDesignVisualizationService; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisValueObject; -import ubic.gemma.model.common.auditAndSecurity.Auditable; import ubic.gemma.model.common.auditAndSecurity.AuditEvent; +import ubic.gemma.model.common.auditAndSecurity.Auditable; import ubic.gemma.model.common.auditAndSecurity.eventType.*; import ubic.gemma.model.expression.experiment.BatchEffectType; import ubic.gemma.model.expression.experiment.ExpressionExperiment; @@ -42,7 +43,6 @@ import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService; -import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.util.EntityUtils; @@ -88,7 +88,7 @@ public class ExpressionExperimentReportServiceImpl implements ExpressionExperime @Autowired private ExpressionExperimentService expressionExperimentService; @Autowired - private ProcessedExpressionDataVectorService processedExpressionDataVectorService; + private ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService; @Autowired private BeanFactory beanFactory; @@ -421,10 +421,10 @@ public void recalculateBatchInfo() { @Transactional public void recalculateExperimentBatchInfo( ExpressionExperiment ee ) { ee = expressionExperimentService.thaw( ee ); - BatchEffectType effect = expressionExperimentService.getBatchEffect( ee ); - String effectStatistics = expressionExperimentService.getBatchEffectStatistics( ee ); + BatchEffectType effect = expressionExperimentBatchInformationService.getBatchEffect( ee ); + String effectStatistics = expressionExperimentBatchInformationService.getBatchEffectStatistics( ee ); String effectSummary = effectStatistics != null ? effectStatistics : effect.name(); - String confound = expressionExperimentService.getBatchConfound( ee ); + String confound = expressionExperimentBatchInformationService.getBatchConfound( ee ); String confoundSummary = confound != null ? confound : ""; if ( !Objects.equals( confound, ee.getBatchConfound() ) ) { diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/service/ExpressionDataFileServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/service/ExpressionDataFileServiceImpl.java index 9355a25d01..691175f174 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/service/ExpressionDataFileServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/service/ExpressionDataFileServiceImpl.java @@ -32,11 +32,11 @@ import ubic.gemma.core.analysis.preprocess.ExpressionDataMatrixBuilder; import ubic.gemma.core.analysis.preprocess.filter.FilterConfig; import ubic.gemma.core.analysis.preprocess.filter.FilteringException; +import ubic.gemma.core.config.Settings; import ubic.gemma.core.datastructure.matrix.ExperimentalDesignWriter; import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix; import ubic.gemma.core.datastructure.matrix.ExpressionDataMatrix; import ubic.gemma.core.datastructure.matrix.MatrixWriter; -import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentMetaFileType; import ubic.gemma.model.analysis.expression.diff.ContrastResult; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult; @@ -53,10 +53,11 @@ import ubic.gemma.persistence.service.association.coexpression.CoexpressionValueObject; import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.expression.bioAssayData.RawAndProcessedExpressionDataVectorService; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; +import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentMetaFileType; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.util.DifferentialExpressionAnalysisResultComparator; import ubic.gemma.persistence.util.EntityUtils; -import ubic.gemma.core.config.Settings; import javax.annotation.Nullable; import javax.annotation.ParametersAreNonnullByDefault; @@ -104,6 +105,8 @@ private static ExpressionExperiment experimentForBioAssaySet( BioAssaySet bas ) @Autowired private ExpressionExperimentService expressionExperimentService; @Autowired + private ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService; + @Autowired private CoexpressionService gene2geneCoexpressionService = null; @Autowired private RawAndProcessedExpressionDataVectorService rawAndProcessedExpressionDataVectorService; @@ -1106,7 +1109,7 @@ private String makeDiffExpressionResultSetFileHeader( ExpressionAnalysisResultSe } } - String batchConf = expressionExperimentService.getBatchConfound( ee ); + String batchConf = expressionExperimentBatchInformationService.getBatchConfound( ee ); if ( batchConf != null ) { buf.append( "# !!! Warning, this dataset has a batch confound with the factors analysed\n" ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java index 40e57df5ff..ee4ea37e9b 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java @@ -24,7 +24,6 @@ import ubic.gemma.core.analysis.preprocess.batcheffects.BatchEffectDetails; import ubic.gemma.core.search.SearchException; import ubic.gemma.model.common.auditAndSecurity.AuditEvent; -import ubic.gemma.model.common.auditAndSecurity.eventType.BatchInformationEvent; import ubic.gemma.model.common.description.AnnotationValueObject; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.common.description.Characteristic; @@ -152,25 +151,6 @@ public interface ExpressionExperimentService @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "AFTER_ACL_COLLECTION_READ" }) List browse( int start, int limit ); - /** - * Check if the given experiment has batch information. - *

- * This does not imply that the batch information is usable or valid. Use {@link #checkHasUsableBatchInfo(ExpressionExperiment)} - * for that purpose. - */ - boolean checkHasBatchInfo( ExpressionExperiment ee ); - - /** - * Check if the given experiment has usable batch information. - */ - boolean checkHasUsableBatchInfo( ExpressionExperiment ee ); - - /** - * Retrieve a batch information event that summarizes the state of batch information. - */ - @Nullable - BatchInformationEvent checkBatchFetchStatus( ExpressionExperiment ee ); - /** * returns ids of search results. * @@ -456,36 +436,6 @@ class CharacteristicWithUsageStatisticsAndOntologyTerm { */ Map getTaxaUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds ); - /** - * Checks the experiment for a batch confound. - * - * @param ee the experiment to check. - * @return a string describing the batch confound, or null if there was no batch confound.[FIXME: String return value is unsafe] - */ - @Nullable - String getBatchConfound( ExpressionExperiment ee ); - - /** - * Obtain the full batch effect details of a given experiment. - * @param ee experiment - * @return details for the principal component most associated with batches (even if it isn't "significant"). Note - * that we don't look at every component, just the first few. - */ - BatchEffectDetails getBatchEffectDetails( ExpressionExperiment ee ); - - /** - * Obtain a {@link BatchEffectType} describing the batch effect state of the given experiment. - * @param ee the experiment to get the batch effect for. - */ - BatchEffectType getBatchEffect( ExpressionExperiment ee ); - - /** - * Obtain a string describing the summary statistics of a batch effect is present in the given experiment. - * @return summary statistics or null if there is no batch effect - */ - @Nullable - String getBatchEffectStatistics( ExpressionExperiment ee ); - /** * @param expressionExperiment experiment * @return the BioAssayDimensions for the study. diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java index 2c8f539a4e..c1ce5fc051 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java @@ -23,7 +23,6 @@ import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; -import org.apache.commons.math3.exception.NotStrictlyPositiveException; import org.hibernate.CacheMode; import org.hibernate.Hibernate; import org.springframework.beans.factory.annotation.Autowired; @@ -34,12 +33,7 @@ import org.springframework.util.Assert; import ubic.basecode.ontology.model.OntologyTerm; import ubic.basecode.ontology.model.OntologyTermSimple; -import ubic.gemma.core.analysis.preprocess.batcheffects.BatchConfound; -import ubic.gemma.core.analysis.preprocess.batcheffects.BatchConfoundUtils; -import ubic.gemma.core.analysis.preprocess.batcheffects.BatchEffectDetails; -import ubic.gemma.core.analysis.preprocess.batcheffects.BatchInfoPopulationServiceImpl; import ubic.gemma.core.analysis.preprocess.svd.SVDService; -import ubic.gemma.core.analysis.preprocess.svd.SVDValueObject; import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchResult; @@ -101,9 +95,6 @@ public class ExpressionExperimentServiceImpl extends AbstractFilteringVoEnabledService implements ExpressionExperimentService { - private static final double BATCH_CONFOUND_THRESHOLD = 0.01; - private static final double BATCH_EFFECT_THRESHOLD = 0.01; - private final ExpressionExperimentDao expressionExperimentDao; @Autowired @@ -333,66 +324,6 @@ public List browse( int start, int limit ) { return this.expressionExperimentDao.browse( start, limit ); } - @Override - @Transactional(readOnly = true) - public boolean checkHasBatchInfo( ExpressionExperiment ee ) { - if ( hasBatchFactor( ee ) ) { - return true; - } - - AuditEvent lastBatchInfoEvent = this.auditEventService.getLastEvent( ee, BatchInformationEvent.class ); - - if ( lastBatchInfoEvent == null ) - return false; - - // prior to 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b, cases of missing batch information was incorrectly typed - // see https://github.com/PavlidisLab/Gemma/issues/1155 for details - if ( lastBatchInfoEvent.getEventType() instanceof FailedBatchInformationFetchingEvent - && lastBatchInfoEvent.getNote() != null && lastBatchInfoEvent.getNote().contains( "No header file for" ) ) { - return false; - } - - return lastBatchInfoEvent.getEventType() instanceof BatchInformationFetchingEvent; - } - - @Override - @Transactional(readOnly = true) - public boolean checkHasUsableBatchInfo( ExpressionExperiment ee ) { - if ( hasBatchFactor( ee ) ) { - return true; - } - - AuditEvent lastBatchInfoEvent = this.auditEventService.getLastEvent( ee, BatchInformationEvent.class ); - - if ( lastBatchInfoEvent == null ) - return false; - - return lastBatchInfoEvent.getEventType() instanceof BatchInformationFetchingEvent - && !( lastBatchInfoEvent.getEventType() instanceof FailedBatchInformationFetchingEvent ); - } - - @Override - @Transactional(readOnly = true) - public BatchInformationEvent checkBatchFetchStatus( ExpressionExperiment ee ) { - if ( hasBatchFactor( ee ) ) { - return new BatchInformationFetchingEvent(); - } - AuditEvent ev = auditEventService.getLastEvent( ee, BatchInformationEvent.class ); - return ev != null ? ( BatchInformationEvent ) ev.getEventType() : null; - } - - private boolean hasBatchFactor( ExpressionExperiment ee ) { - ee = ensureInSession( ee ); - if ( ee.getExperimentalDesign() != null ) { - for ( ExperimentalFactor ef : ee.getExperimentalDesign().getExperimentalFactors() ) { - if ( BatchInfoPopulationServiceImpl.isBatchFactor( ef ) ) { - return true; - } - } - } - return false; - } - /** * returns ids of search results * @@ -1060,187 +991,6 @@ public Map getTaxaUsageFrequency( @Nullable Filters filters, @Nulla } } - @Override - @Transactional(readOnly = true) - public String getBatchConfound( ExpressionExperiment ee ) { - ee = this.thawBioAssays( ee ); - - if ( !this.checkHasUsableBatchInfo( ee ) ) { - log.info( "Experiment has no usable batch information, cannot check for confound: " + ee ); - return null; - } - - Collection confounds; - try { - confounds = BatchConfoundUtils.test( ee ); - } catch ( NotStrictlyPositiveException e ) { - AbstractService.log.error( String.format( "Batch confound test for %s threw a NonStrictlyPositiveException! Returning null.", ee ), e ); - return null; - } - - StringBuilder result = new StringBuilder(); - // Confounds have to be sorted in order to always get the same string - List listConfounds = new ArrayList<>( confounds ); - listConfounds.sort( Comparator.comparing( BatchConfound::toString ) ); - - for ( BatchConfound c : listConfounds ) { - if ( c.getP() < ExpressionExperimentServiceImpl.BATCH_CONFOUND_THRESHOLD ) { - String factorName = c.getEf().getName(); - if ( result.toString().isEmpty() ) { - result.append( - "One or more factors were confounded with batches in the full design; batch correction was not performed. " - + "Analyses may not be affected if performed on non-confounded subsets. Factor(s) confounded were: " ); - } else { - result.append( ", " ); - } - result.append( factorName ); - } - } - - // Now check subsets, if relevant. - if ( !listConfounds.isEmpty() && gemma.gsec.util.SecurityUtil.isUserAdmin() ) { - Collection subSets = this.getSubSets( ee ); - if ( !subSets.isEmpty() ) { - for ( ExpressionExperimentSubSet subset : subSets ) { - try { - confounds = BatchConfoundUtils.test( subset ); - for ( BatchConfound c : confounds ) { - if ( c.getP() < ExpressionExperimentServiceImpl.BATCH_CONFOUND_THRESHOLD ) { - result.append( "

Confound still exists for " + c.getEf().getName() + " in " + subset ); - } - } - } catch ( NotStrictlyPositiveException e ) { - - } - } - } - } - - return StringUtils.stripToNull( result.toString() ); - } - - private boolean checkIfSingleBatch( ExpressionExperiment ee ) { - AuditEvent ev = this.auditEventService.getLastEvent( ee, BatchInformationFetchingEvent.class ); - if ( ev == null ) return false; - - if ( ev.getEventType() instanceof SingleBatchDeterminationEvent ) { - return true; - } - - // address cases that were run prior to having the SingleBatchDeterminationEvent type. - if ( ev.getNote() != null && ( ev.getNote().startsWith( "1 batch" ) || ev.getNote().startsWith( "AffyScanDateExtractor; 0 batches" ) ) ) { - return true; - } - - return false; - } - - @Override - @Transactional(readOnly = true) - public BatchEffectDetails getBatchEffectDetails( ExpressionExperiment ee ) { - ee = this.thawLiter( ee ); - - BatchEffectDetails details = new BatchEffectDetails( this.checkBatchFetchStatus( ee ), - this.getHasBeenBatchCorrected( ee ), this.checkIfSingleBatch( ee ) ); - - // if missing or failed, we can't compute a P-value - if ( !details.hasBatchInformation() || details.hasProblematicBatchInformation() ) { - return details; - } - - // we can't compute a P-value for a single batch - if ( details.isSingleBatch() ) { - return details; - } - - for ( ExperimentalFactor ef : ee.getExperimentalDesign().getExperimentalFactors() ) { - if ( BatchInfoPopulationServiceImpl.isBatchFactor( ef ) ) { - SVDValueObject svd = svdService.getSvdFactorAnalysis( ee.getId() ); - if ( svd == null ) { - log.warn( "SVD was null for " + ef + ", can't compute batch effect statistics." ); - break; - } - - // Use the "date run" information as a first pass to decide if there is a batch association. - // This won't always be present. - double minP = 1.0; - if ( svd.getDatePvals() != null ) { - for ( Integer component : svd.getDatePvals().keySet() ) { - Double pVal = svd.getDatePvals().get( component ); - if ( pVal != null && pVal < minP ) { - details.setBatchEffectStatistics( pVal, component + 1, svd.getVariances()[component] ); - minP = pVal; - } - } - } - - // we can override the date-based p-value with the factor-based p-value if it is lower. - // The reason to do this is it can be underpowered. The date-based one is more sensitive. - for ( Integer component : svd.getFactorPvals().keySet() ) { - Map cmpEffects = svd.getFactorPvals().get( component ); - - // could use the effect size instead of the p-values (or in addition) - //Map cmpEffectSizes = svd.getFactorCorrelations().get( component ); - - Double pVal = cmpEffects.get( ef.getId() ); - if ( pVal != null && pVal < minP ) { - details.setBatchEffectStatistics( pVal, component + 1, svd.getVariances()[component] ); - minP = pVal; - } - - } - return details; - } - } - - log.warn( String.format( "No suitable batch factor was found for %s to obtain batch effect statistics.", ee ) ); - - return details; - } - - @Override - @Transactional(readOnly = true) - public BatchEffectType getBatchEffect( ExpressionExperiment ee ) { - BatchEffectDetails beDetails = this.getBatchEffectDetails( ee ); - BatchEffectDetails.BatchEffectStatistics batchEffectStatistics = beDetails.getBatchEffectStatistics(); - - if ( beDetails.getHasSingletonBatches() ) { - return BatchEffectType.SINGLETON_BATCHES_FAILURE; - } else if ( beDetails.getHasUninformativeBatchInformation() ) { - return BatchEffectType.UNINFORMATIVE_HEADERS_FAILURE; - } else if ( !beDetails.hasBatchInformation() ) { - return BatchEffectType.NO_BATCH_INFO; - } else if ( beDetails.hasProblematicBatchInformation() ) { - return BatchEffectType.PROBLEMATIC_BATCH_INFO_FAILURE; - } else if ( beDetails.isSingleBatch() ) { - return BatchEffectType.SINGLE_BATCH_SUCCESS; - } else if ( beDetails.getDataWasBatchCorrected() ) { - // Checked for in ExpressionExperimentDetails.js::renderStatus() - return BatchEffectType.BATCH_CORRECTED_SUCCESS; - } else { - if ( batchEffectStatistics == null ) { - return BatchEffectType.BATCH_EFFECT_UNDETERMINED_FAILURE; - } else if ( batchEffectStatistics.getPvalue() < ExpressionExperimentServiceImpl.BATCH_EFFECT_THRESHOLD ) { - // this means there was a batch effect but we couldn't correct it - return BatchEffectType.BATCH_EFFECT_FAILURE; - } else { - return BatchEffectType.NO_BATCH_EFFECT_SUCCESS; - } - } - } - - @Nullable - @Override - @Transactional(readOnly = true) - public String getBatchEffectStatistics( ExpressionExperiment ee ) { - BatchEffectDetails beDetails = this.getBatchEffectDetails( ee ); - if ( beDetails.getBatchEffectStatistics() != null ) { - return String.format( "This data set may have a batch artifact (PC %d), p=%.5g", - beDetails.getBatchEffectStatistics().getComponent(), - beDetails.getBatchEffectStatistics().getPvalue() ); - } - return null; - } @Override @Transactional(readOnly = true) @@ -1624,15 +1374,6 @@ private Collection getAnnotationsByBioMaterials } - private boolean getHasBeenBatchCorrected( ExpressionExperiment ee ) { - for ( QuantitationType qt : ee.getQuantitationTypes() ) { - if ( qt.getIsBatchCorrected() ) { - return true; - } - } - return false; - } - /** * @param ees experiments * @param type event type diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/GeeqServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/GeeqServiceImpl.java index d3d460dbc8..d59b6bed74 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/GeeqServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/GeeqServiceImpl.java @@ -31,6 +31,7 @@ import ubic.basecode.math.DescriptiveWithMissing; import ubic.gemma.core.analysis.preprocess.OutlierDetectionService; import ubic.gemma.core.analysis.preprocess.batcheffects.BatchEffectDetails; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; import ubic.gemma.core.analysis.service.ExpressionDataMatrixService; import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix; import ubic.gemma.model.common.auditAndSecurity.eventType.GeeqEvent; @@ -94,7 +95,9 @@ public class GeeqServiceImpl extends AbstractVoEnabledService experimentalFactors = ee.getExperimentalDesign().getExperimentalFactors(); assertTrue( experimentalFactors.isEmpty() ); assertTrue( auditService.hasEvent( ee, FailedBatchInformationFetchingEvent.class ) ); - assertTrue( this.eeService.checkHasBatchInfo( ee ) ); - assertFalse( this.eeService.checkHasUsableBatchInfo( ee ) ); + assertTrue( this.eeBatchService.checkHasBatchInfo( ee ) ); + assertFalse( this.eeBatchService.checkHasUsableBatchInfo( ee ) ); } @Test(expected = FASTQHeadersPresentButNotUsableException.class) diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/service/ExpressionDataFileServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/service/ExpressionDataFileServiceTest.java index b7a724e96b..0143772ad6 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/service/ExpressionDataFileServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/service/ExpressionDataFileServiceTest.java @@ -10,15 +10,16 @@ import org.springframework.context.annotation.Configuration; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests; -import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentMetaFileType; +import ubic.gemma.core.config.Settings; +import ubic.gemma.core.context.TestComponent; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; import ubic.gemma.persistence.service.association.coexpression.CoexpressionService; import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.expression.bioAssayData.RawAndProcessedExpressionDataVectorService; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; +import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentMetaFileType; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; -import ubic.gemma.core.config.Settings; -import ubic.gemma.core.context.TestComponent; import java.io.File; import java.io.IOException; @@ -60,6 +61,11 @@ public ExpressionExperimentService expressionExperimentService() { return mock( ExpressionExperimentService.class ); } + @Bean + public ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService() { + return mock(); + } + @Bean public CoexpressionService gene2geneCoexpressionService() { return mock( CoexpressionService.class ); diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentBatchInformationServiceTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentBatchInformationServiceTest.java new file mode 100644 index 0000000000..604d8ea077 --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentBatchInformationServiceTest.java @@ -0,0 +1,127 @@ +package ubic.gemma.persistence.service.expression.experiment; + +import org.junit.Before; +import org.junit.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationServiceImpl; +import ubic.gemma.core.analysis.preprocess.svd.SVDService; +import ubic.gemma.core.context.TestComponent; +import ubic.gemma.model.common.auditAndSecurity.AuditAction; +import ubic.gemma.model.common.auditAndSecurity.AuditEvent; +import ubic.gemma.model.common.auditAndSecurity.eventType.*; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; + +import java.util.Date; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +@ContextConfiguration +public class ExpressionExperimentBatchInformationServiceTest extends AbstractJUnit4SpringContextTests { + + @Configuration + @TestComponent + static class ExpressionExperimentBatchInformationServiceTestContextConfiguration { + + @Bean + public ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService() { + return new ExpressionExperimentBatchInformationServiceImpl(); + } + + @Bean + public ExpressionExperimentService expressionExperimentService() { + return mock(); + } + + @Bean + public SVDService svdService() { + return mock(); + } + + @Bean + public AuditEventService auditEventService() { + return mock(); + } + + } + + @Autowired + private ExpressionExperimentBatchInformationService eeBatchService; + + @Autowired + private ExpressionExperimentService expressionExperimentService; + + @Autowired + private AuditEventService auditEventService; + + @Before + public void setUp() { + when( expressionExperimentService.thawLiter( any() ) ).thenAnswer( a -> a.getArgument( 0 ) ); + } + + @Test + public void testBatchInfo() { + AuditEventType aet; + AuditEvent ae; + ExpressionExperiment ee; + + // no batch factor, no batch info attempt + ee = new ExpressionExperiment(); + assertFalse( eeBatchService.checkHasBatchInfo( ee ) ); + assertFalse( eeBatchService.checkHasUsableBatchInfo( ee ) ); + + ee = new ExpressionExperiment(); + aet = new BatchInformationFetchingEvent(); + ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, null, null, null, aet ); + when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); + assertTrue( eeBatchService.checkHasBatchInfo( ee ) ); + assertTrue( eeBatchService.checkHasUsableBatchInfo( ee ) ); + + ee = new ExpressionExperiment(); + aet = new SingleBatchDeterminationEvent(); + ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, null, null, null, aet ); + when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); + assertTrue( eeBatchService.checkHasBatchInfo( ee ) ); + assertTrue( eeBatchService.checkHasUsableBatchInfo( ee ) ); + + ee = new ExpressionExperiment(); + aet = new BatchInformationMissingEvent(); + ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, null, null, null, aet ); + when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); + assertFalse( eeBatchService.checkHasBatchInfo( ee ) ); + assertFalse( eeBatchService.checkHasUsableBatchInfo( ee ) ); + + // batch info missing (after 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b) + ee = new ExpressionExperiment(); + aet = new BatchInformationMissingEvent(); + ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, "Error while processing FASTQ headers for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576: No header file for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576", null, null, aet ); + when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); + assertFalse( eeBatchService.checkHasBatchInfo( ee ) ); + assertFalse( eeBatchService.checkHasUsableBatchInfo( ee ) ); + + // batch info failed (prior to 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b) + ee = new ExpressionExperiment(); + aet = new FailedBatchInformationFetchingEvent(); + ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, "Error while processing FASTQ headers for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576: No header file for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576", null, null, aet ); + when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); + assertFalse( eeBatchService.checkHasBatchInfo( ee ) ); + assertFalse( eeBatchService.checkHasUsableBatchInfo( ee ) ); + + // has batch information, but it's got some issues + ee = new ExpressionExperiment(); + aet = new FailedBatchInformationFetchingEvent(); + ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, "Invalid lane for sample GSM...", null, null, aet ); + when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); + assertTrue( eeBatchService.checkHasBatchInfo( ee ) ); + assertFalse( eeBatchService.checkHasUsableBatchInfo( ee ) ); + } +} \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java index 42a7d7f101..9c605b495f 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java @@ -14,10 +14,6 @@ import ubic.gemma.core.context.TestComponent; import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.core.search.SearchService; -import ubic.gemma.model.common.auditAndSecurity.AuditAction; -import ubic.gemma.model.common.auditAndSecurity.AuditEvent; -import ubic.gemma.model.common.auditAndSecurity.eventType.*; -import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.analysis.expression.coexpression.CoexpressionAnalysisService; import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; import ubic.gemma.persistence.service.analysis.expression.pca.PrincipalComponentAnalysisService; @@ -32,12 +28,9 @@ import ubic.gemma.persistence.util.Filters; import java.util.Collections; -import java.util.Date; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.*; /** @@ -165,12 +158,9 @@ public AccessDecisionManager accessDecisionManager() { @Autowired private OntologyService ontologyService; - @Autowired - private AuditEventService auditEventService; - @After public void tearDown() { - reset( ontologyService, auditEventService ); + reset( ontologyService ); } @Test @@ -207,61 +197,4 @@ public void testGetAnnotationsUsageFrequencyWithFilters() throws TimeoutExceptio verify( expressionExperimentDao ).getAnnotationsUsageFrequency( Collections.emptyList(), null, -1, 0, null, null, null, null ); verifyNoMoreInteractions( expressionExperimentDao ); } - - @Test - public void testBatchInfo() { - AuditEventType aet; - AuditEvent ae; - ExpressionExperiment ee; - - // no batch factor, no batch info attempt - ee = new ExpressionExperiment(); - assertFalse( expressionExperimentService.checkHasBatchInfo( ee ) ); - assertFalse( expressionExperimentService.checkHasUsableBatchInfo( ee ) ) - - ee = new ExpressionExperiment(); - aet = new BatchInformationFetchingEvent(); - ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, null, null, null, aet ); - when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); - assertTrue( expressionExperimentService.checkHasBatchInfo( ee ) ); - assertTrue( expressionExperimentService.checkHasUsableBatchInfo( ee ) ); - - ee = new ExpressionExperiment(); - aet = new SingleBatchDeterminationEvent(); - ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, null, null, null, aet ); - when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); - assertTrue( expressionExperimentService.checkHasBatchInfo( ee ) ); - assertTrue( expressionExperimentService.checkHasUsableBatchInfo( ee ) ); - - ee = new ExpressionExperiment(); - aet = new BatchInformationMissingEvent(); - ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, null, null, null, aet ); - when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); - assertFalse( expressionExperimentService.checkHasBatchInfo( ee ) ); - assertFalse( expressionExperimentService.checkHasUsableBatchInfo( ee ) ); - - // batch info missing (after 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b) - ee = new ExpressionExperiment(); - aet = new BatchInformationMissingEvent(); - ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, "Error while processing FASTQ headers for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576: No header file for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576", null, null, aet ); - when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); - assertFalse( expressionExperimentService.checkHasBatchInfo( ee ) ); - assertFalse( expressionExperimentService.checkHasUsableBatchInfo( ee ) ); - - // batch info failed (prior to 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b) - ee = new ExpressionExperiment(); - aet = new FailedBatchInformationFetchingEvent(); - ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, "Error while processing FASTQ headers for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576: No header file for ExpressionExperiment Id=35322 Name=Medial prefrontal cortex transcriptome of mice susceptible or resilient to chronic stress Short Name=GSE226576", null, null, aet ); - when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); - assertFalse( expressionExperimentService.checkHasBatchInfo( ee ) ); - assertFalse( expressionExperimentService.checkHasUsableBatchInfo( ee ) ); - - // has batch information, but it's got some issues - ee = new ExpressionExperiment(); - aet = new FailedBatchInformationFetchingEvent(); - ae = AuditEvent.Factory.newInstance( new Date(), AuditAction.UPDATE, "Invalid lane for sample GSM...", null, null, aet ); - when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); - assertTrue( expressionExperimentService.checkHasBatchInfo( ee ) ); - assertFalse( expressionExperimentService.checkHasUsableBatchInfo( ee ) ); - } } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 078f7ed55a..2b93a36240 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -70,9 +70,9 @@ import ubic.gemma.model.genome.gene.GeneValueObject; import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionResultService; -import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.util.Filters; import ubic.gemma.persistence.util.Slice; @@ -143,6 +143,8 @@ public class DatasetsWebService { private TaxonArgService taxonArgService; @Autowired private DifferentialExpressionResultService differentialExpressionResultService; + @Autowired + private ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService; @Context private UriInfo uriInfo; @@ -1005,11 +1007,10 @@ public Response getDatasetDesign( // Params: } /** - * Returns true if the experiment has had batch information successfully filled in. This will be true even if there - * is only one batch. It does not reflect the presence or absence of a batch effect. - * - * @param datasetArg can either be the ExpressionExperiment ID or its short name (e.g. GSE1234). Retrieval by ID - * is more efficient. Only datasets that user has access to will be available. + * Indicate if the experiment has batch information. + *

+ * This does not imply that the batch information is usable. This will be true even if there is only one batch. It + * does not reflect the presence or absence of a batch effect. */ @GET @Path("/{dataset}/hasbatch") @@ -1019,7 +1020,7 @@ public ResponseDataObject getDatasetHasBatchInformation( // Params: @PathParam("dataset") DatasetArg datasetArg // Required ) { ExpressionExperiment ee = datasetArgService.getEntity( datasetArg ); - return respond( expressionExperimentService.checkHasBatchInfo( ee ) ); + return respond( expressionExperimentBatchInformationService.checkHasBatchInfo( ee ) ); } /** diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java index 2f4f28f6d4..b00b320dfd 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java @@ -41,6 +41,7 @@ import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.expression.bioAssay.BioAssayService; import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.util.Filter; import ubic.gemma.persistence.util.Filters; @@ -193,6 +194,11 @@ public DatabaseEntryArgService databaseEntryArgService() { public ExpressionExperimentReportService expressionExperimentReportService() { return mock(); } + + @Bean + public ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService() { + return mock(); + } } @Autowired diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java index 6203d69491..7a72290f08 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java @@ -36,6 +36,7 @@ import ubic.gemma.core.analysis.preprocess.MeanVarianceService; import ubic.gemma.core.analysis.preprocess.OutlierDetails; import ubic.gemma.core.analysis.preprocess.OutlierDetectionService; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; import ubic.gemma.core.analysis.preprocess.svd.SVDService; import ubic.gemma.core.analysis.report.ExpressionExperimentReportService; import ubic.gemma.core.analysis.report.WhatsNew; @@ -124,6 +125,8 @@ public class ExpressionExperimentController { @Autowired private ExpressionExperimentService expressionExperimentService; @Autowired + private ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService; + @Autowired private AuditTrailService auditTrailService; @Autowired private ExpressionExperimentSearchService expressionExperimentSearchService; @@ -611,14 +614,14 @@ public ExpressionExperimentDetailsValueObject loadExpressionExperimentDetails( L public void recalculateBatchConfound( Long id ) { ExpressionExperiment ee = getExperimentById( id, false ); - ee.setBatchConfound( expressionExperimentService.getBatchConfound( ee ) ); + ee.setBatchConfound( expressionExperimentBatchInformationService.getBatchConfound( ee ) ); expressionExperimentService.update( ee ); } public void recalculateBatchEffect( Long id ) { ExpressionExperiment ee = getExperimentById( id, false ); - ee.setBatchEffect( expressionExperimentService.getBatchEffect( ee ) ); - ee.setBatchEffectStatistics( expressionExperimentService.getBatchEffectStatistics( ee ) ); + ee.setBatchEffect( expressionExperimentBatchInformationService.getBatchEffect( ee ) ); + ee.setBatchEffectStatistics( expressionExperimentBatchInformationService.getBatchEffectStatistics( ee ) ); expressionExperimentService.update( ee ); } @@ -1167,13 +1170,13 @@ private int numOutliersRemoved( ExpressionExperiment ee ) { * @param finalResult result */ private void setBatchInfo( ExpressionExperimentDetailsValueObject finalResult, ExpressionExperiment ee ) { - boolean hasUsableBatchInformation = expressionExperimentService.checkHasUsableBatchInfo( ee ); + boolean hasUsableBatchInformation = expressionExperimentBatchInformationService.checkHasUsableBatchInfo( ee ); finalResult.setHasBatchInformation( hasUsableBatchInformation ); if ( hasUsableBatchInformation ) { - finalResult.setBatchConfound( expressionExperimentService.getBatchConfound( ee ) ); + finalResult.setBatchConfound( expressionExperimentBatchInformationService.getBatchConfound( ee ) ); } - finalResult.setBatchEffect( expressionExperimentService.getBatchEffect( ee ).name() ); - finalResult.setBatchEffectStatistics( expressionExperimentService.getBatchEffectStatistics( ee ) ); + finalResult.setBatchEffect( expressionExperimentBatchInformationService.getBatchEffect( ee ).name() ); + finalResult.setBatchEffectStatistics( expressionExperimentBatchInformationService.getBatchEffectStatistics( ee ) ); } /** From 0b101ed2ae44e35b5332a0800e60391221dfb71c Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 20 Jun 2024 13:59:31 -0700 Subject: [PATCH 50/81] Fix AssumptionViolatedException getting wrapped by a RuntimeException This prevents the test from getting skipped. --- .../src/test/java/ubic/gemma/core/util/test/Assumptions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/test/Assumptions.java b/gemma-core/src/test/java/ubic/gemma/core/util/test/Assumptions.java index 3afa0c2acd..0ae77dd7b5 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/util/test/Assumptions.java +++ b/gemma-core/src/test/java/ubic/gemma/core/util/test/Assumptions.java @@ -42,7 +42,7 @@ public static void assumeThatResourceIsAvailable( String url ) { assumeNoException( String.format( "The resource at %s is not available.", url ), e ); } catch ( SSLException e ) { assumeNoException( String.format( "SSL issue attempting to connect to %s.", url ), e ); - } catch ( Exception e ) { + } catch ( IOException e ) { throw new RuntimeException( e ); } finally { if ( con instanceof HttpURLConnection ) { From c2b6ac02d4120219dff8e73b5da55e1f1b968b97 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 18 Apr 2024 11:21:06 -0700 Subject: [PATCH 51/81] rest: Add endpoints for obtaining parent and children terms Add missing test execution listener to JerseyTest. Make tearDown() final in JerseyTest since it's a requirement that it must be done in the parent class. --- .../gemma/rest/AnnotationsWebService.java | 63 ++++++++++- .../gemma/rest/AnnotationsWebServiceTest.java | 102 +++++++++++++++--- 2 files changed, 152 insertions(+), 13 deletions(-) diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/AnnotationsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/AnnotationsWebService.java index f85bca82c8..5bd95a3789 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/AnnotationsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/AnnotationsWebService.java @@ -31,7 +31,7 @@ import org.apache.commons.lang3.time.StopWatch; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; -import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentSearchService; +import ubic.basecode.ontology.model.OntologyTerm; import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.core.search.*; import ubic.gemma.core.search.lucene.LuceneQueryUtils; @@ -41,6 +41,7 @@ import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject; import ubic.gemma.model.genome.Taxon; import ubic.gemma.persistence.service.common.description.CharacteristicService; +import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentSearchService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.util.Filters; import ubic.gemma.persistence.util.Slice; @@ -56,6 +57,8 @@ import java.net.URI; import java.util.*; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.stream.Collectors; import static ubic.gemma.rest.util.Responders.paginate; import static ubic.gemma.rest.util.Responders.respond; @@ -107,6 +110,64 @@ public AnnotationsWebService( OntologyService ontologyService, SearchService sea this.taxonArgService = taxonArgService; } + /*https://www.w3.org/TR/owl-ref/#subClassOf-def* + * Obtain the parent of a given annotation. + *

+ * This is plural as we might add support for querying multiple annotations at once in the future. + */ + @GET + @Path("/parents") + @Produces(MediaType.APPLICATION_JSON) + @Operation(summary = "Retrieve the parents of the given annotations", + description = "Terms that are returned satisfies the [rdfs:subClassOf](https://www.w3.org/TR/2012/REC-owl2-syntax-20121211/#Subclass_Axioms) or [part_of](http://purl.obolibrary.org/obo/BFO_0000050) relations. When `direct` is set to false, this rule is applied recursively.", + responses = { + @ApiResponse(useReturnTypeSchema = true, content = @Content()), + @ApiResponse(responseCode = "404", description = "No term matched the given URI."), + @ApiResponse(responseCode = "503", description = "Ontology inference timed out.") }) + public List getAnnotationsParents( + @Parameter(description = "Term URI") @QueryParam("uri") String termUri, + @Parameter(description = "Only include direct children.") @QueryParam("direct") @DefaultValue("false") boolean direct ) { + return getAnnotationsParentsOrChildren( termUri, direct, true ); + } + + /** + * Obtain the children of a given annotation. + *

+ * This is plural as we might add support for querying multiple annotations at once in the future. + */ + @GET + @Path("/children") + @Produces(MediaType.APPLICATION_JSON) + @Operation(summary = "Retrieve the children of the given annotations", + description = "Terms that are returned satisfies the [inverse of rdfs:subClassOf](https://www.w3.org/TR/2012/REC-owl2-syntax-20121211/#Subclass_Axioms) or [has_part](http://purl.obolibrary.org/obo/BFO_0000051) relations. When `direct` is set to false, this rule is applied recursively.", + responses = { + @ApiResponse(useReturnTypeSchema = true, content = @Content()), + @ApiResponse(responseCode = "404", description = "No term matched the given URI."), + @ApiResponse(responseCode = "503", description = "Ontology inference timed out.") }) + public List getAnnotationsChildren( + @Parameter(description = "Term URI") @QueryParam("uri") String termUri, + @Parameter(description = "Only include direct parents.") @QueryParam("direct") @DefaultValue("false") boolean direct ) { + return getAnnotationsParentsOrChildren( termUri, direct, false ); + } + + private List getAnnotationsParentsOrChildren( String termUri, boolean direct, boolean parents ) { + if ( StringUtils.isBlank( termUri ) ) { + throw new BadRequestException( "The 'uri' parameter must not be blank." ); + } + OntologyTerm term = ontologyService.getTerm( termUri ); + if ( term == null ) { + throw new NotFoundException( "No ontology term with URI " + termUri ); + } + try { + return ( parents ? ontologyService.getParents( Collections.singleton( term ), direct, true, 30, TimeUnit.SECONDS ) : + ontologyService.getChildren( Collections.singleton( term ), direct, true, 30, TimeUnit.SECONDS ) ).stream() + .map( t -> new AnnotationSearchResultValueObject( t.getLabel(), t.getUri(), null, null ) ) + .collect( Collectors.toList() ); + } catch ( TimeoutException e ) { + throw new ServiceUnavailableException( DateUtils.addSeconds( new Date(), 30 ), e ); + } + } + /** * Does a search for annotation tags based on the given string. * diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/AnnotationsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/AnnotationsWebServiceTest.java index aa3c225d7c..b00198cfc4 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/AnnotationsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/AnnotationsWebServiceTest.java @@ -1,25 +1,26 @@ package ubic.gemma.rest; +import io.swagger.v3.oas.models.OpenAPI; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; +import org.springframework.security.access.AccessDecisionManager; import org.springframework.security.test.context.support.WithMockUser; import org.springframework.security.test.context.support.WithSecurityContextTestExecutionListener; -import org.springframework.test.context.ActiveProfiles; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestExecutionListeners; -import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests; -import org.springframework.test.context.web.WebAppConfiguration; +import ubic.basecode.ontology.model.OntologyTerm; import ubic.gemma.core.analysis.preprocess.OutlierDetectionService; import ubic.gemma.core.context.TestComponent; -import ubic.gemma.persistence.service.genome.gene.GeneService; import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchResult; import ubic.gemma.core.search.SearchService; +import ubic.gemma.core.util.BuildInfo; import ubic.gemma.model.common.search.SearchSettings; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject; @@ -29,29 +30,39 @@ import ubic.gemma.persistence.service.expression.bioAssay.BioAssayService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.service.genome.ChromosomeService; +import ubic.gemma.persistence.service.genome.gene.GeneService; import ubic.gemma.persistence.service.genome.taxon.TaxonService; -import ubic.gemma.persistence.util.*; +import ubic.gemma.persistence.util.Filter; +import ubic.gemma.persistence.util.Filters; +import ubic.gemma.persistence.util.Slice; +import ubic.gemma.persistence.util.Sort; +import ubic.gemma.rest.analytics.AnalyticsProvider; +import ubic.gemma.rest.util.BaseJerseyTest; +import ubic.gemma.rest.util.JacksonConfig; import ubic.gemma.rest.util.QueriedAndFilteredAndPaginatedResponseDataObject; import ubic.gemma.rest.util.SortValueObject; import ubic.gemma.rest.util.args.*; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; import java.util.Collections; +import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.Mockito.*; +import static ubic.gemma.rest.util.Assertions.assertThat; /** * @author poirigui */ -@ActiveProfiles("web") -@WebAppConfiguration @ContextConfiguration -@TestExecutionListeners(WithSecurityContextTestExecutionListener.class) -public class AnnotationsWebServiceTest extends AbstractJUnit4SpringContextTests { +@TestExecutionListeners({ WithSecurityContextTestExecutionListener.class }) +public class AnnotationsWebServiceTest extends BaseJerseyTest { @Configuration @TestComponent + @Import(JacksonConfig.class) public static class AnnotationsWebServiceContextConfiguration { @Bean @@ -95,6 +106,26 @@ public AnnotationsWebService annotationsWebService( OntologyService ontologyServ DatasetArgService datasetRestService, TaxonArgService taxonArgService ) { return new AnnotationsWebService( ontologyService, searchService, characteristicService, expressionExperimentService, datasetRestService, taxonArgService ); } + + @Bean + public AnalyticsProvider analyticsProvider() { + return mock(); + } + + @Bean + public AccessDecisionManager accessDecisionManager() { + return mock(); + } + + @Bean + public OpenAPI openAPI() { + return mock(); + } + + @Bean + public BuildInfo buildInfo() { + return mock(); + } } @Autowired @@ -109,16 +140,19 @@ public AnnotationsWebService annotationsWebService( OntologyService ontologyServ @Autowired private ExpressionExperimentService expressionExperimentService; + @Autowired + private OntologyService ontologyService; + @Before - public void setUp() { + public void setUpMocks() { Taxon taxon = Taxon.Factory.newInstance(); taxon.setId( 1L ); when( taxonService.findByCommonName( "human" ) ).thenReturn( taxon ); } @After - public void tearDown() { - reset( searchService, taxonService ); + public void resetMocks() { + reset( searchService, taxonService, ontologyService ); } @Test @@ -163,4 +197,48 @@ public void testSearchTaxonDatasets() throws SearchException, TimeoutException { verify( expressionExperimentService ).loadValueObjects( any( Filters.class ), eq( Sort.by( "ee", "id", Sort.Direction.ASC, "id" ) ), eq( 0 ), eq( 20 ) ); } + @Test + public void testParents() throws TimeoutException { + OntologyTerm term = mock( OntologyTerm.class ); + when( ontologyService.getTerm( "http://example.com/test" ) ).thenReturn( term ); + assertThat( target( "/annotations/parents" ).queryParam( "uri", "http://example.com/test" ).request().get() ) + .hasStatus( Response.Status.OK ) + .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ); + verify( ontologyService ).getTerm( "http://example.com/test" ); + verify( ontologyService ).getParents( Collections.singleton( term ), false, true, 30L, TimeUnit.SECONDS ); + } + + @Test + public void testParentsWhenTermIsNotFound() { + assertThat( target( "/annotations/parents" ).queryParam( "uri", "http://example.com/test" ).request().get() ) + .hasStatus( Response.Status.NOT_FOUND ) + .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ); + verify( ontologyService ).getTerm( "http://example.com/test" ); + verifyNoMoreInteractions( ontologyService ); + } + + @Test + public void testParentsWhenInferenceTimeout() throws TimeoutException { + OntologyTerm term = mock( OntologyTerm.class ); + when( ontologyService.getTerm( "http://example.com/test" ) ).thenReturn( term ); + when( ontologyService.getParents( any(), anyBoolean(), anyBoolean(), anyLong(), any() ) ).thenThrow( new TimeoutException( "Ontology inference timed out!" ) ); + assertThat( target( "/annotations/parents" ).queryParam( "uri", "http://example.com/test" ).request().get() ) + .hasStatus( Response.Status.SERVICE_UNAVAILABLE ) + .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ) + .entity() + .hasFieldOrPropertyWithValue( "error.code", 503 ) + .hasFieldOrPropertyWithValue( "error.message", "HTTP 503 Service Unavailable" ); + verify( ontologyService ).getTerm( "http://example.com/test" ); + verify( ontologyService ).getParents( Collections.singleton( term ), false, true, 30L, TimeUnit.SECONDS ); + } + + @Test + public void testChildren() throws TimeoutException { + OntologyTerm term = mock( OntologyTerm.class ); + when( ontologyService.getTerm( "http://example.com/test" ) ).thenReturn( term ); + assertThat( target( "/annotations/children" ).queryParam( "uri", "http://example.com/test" ).request().get() ) + .hasStatus( Response.Status.OK ); + verify( ontologyService ).getTerm( "http://example.com/test" ); + verify( ontologyService ).getChildren( Collections.singleton( term ), false, true, 30L, TimeUnit.SECONDS ); + } } \ No newline at end of file From a8bf734b609beffcab31d9659c47d5ac1d3f4d2c Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 21 Jun 2024 10:27:13 -0700 Subject: [PATCH 52/81] Fix missing batch info service --- .../ExpressionExperimentReportServiceImpl.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java index f0bd74e9ac..3dbe8ff14e 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceImpl.java @@ -30,10 +30,11 @@ import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Propagation; import org.springframework.transaction.annotation.Transactional; +import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; import ubic.gemma.core.visualization.ExperimentalDesignVisualizationService; import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisValueObject; -import ubic.gemma.model.common.auditAndSecurity.Auditable; import ubic.gemma.model.common.auditAndSecurity.AuditEvent; +import ubic.gemma.model.common.auditAndSecurity.Auditable; import ubic.gemma.model.common.auditAndSecurity.eventType.*; import ubic.gemma.model.expression.experiment.BatchEffectType; import ubic.gemma.model.expression.experiment.ExpressionExperiment; @@ -42,7 +43,6 @@ import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService; -import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.util.EntityUtils; @@ -88,7 +88,7 @@ public class ExpressionExperimentReportServiceImpl implements ExpressionExperime @Autowired private ExpressionExperimentService expressionExperimentService; @Autowired - private ProcessedExpressionDataVectorService processedExpressionDataVectorService; + private ExpressionExperimentBatchInformationService expressionExperimentBatchInformationService; @Autowired private BeanFactory beanFactory; @@ -421,10 +421,10 @@ public void recalculateBatchInfo() { @Transactional public void recalculateExperimentBatchInfo( ExpressionExperiment ee ) { ee = expressionExperimentService.thaw( ee ); - BatchEffectType effect = expressionExperimentService.getBatchEffect( ee ); - String effectStatistics = expressionExperimentService.getBatchEffectStatistics( ee ); + BatchEffectType effect = expressionExperimentBatchInformationService.getBatchEffect( ee ); + String effectStatistics = expressionExperimentBatchInformationService.getBatchEffectStatistics( ee ); String effectSummary = effectStatistics != null ? effectStatistics : effect.name(); - String confound = expressionExperimentService.getBatchConfound( ee ); + String confound = expressionExperimentBatchInformationService.getBatchConfound( ee ); String confoundSummary = confound != null ? confound : ""; if ( !Objects.equals( confound, ee.getBatchConfound() ) ) { From 50c5a3f5aa48502a9670be8b70d89cfc0de4bbcf Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 21 Jun 2024 10:45:57 -0700 Subject: [PATCH 53/81] Fix more edge cases for determining batch effect type Add more tests to cover various failure modes. --- .../links/LinkAnalysisServiceImpl.java | 2 +- .../batcheffects/BatchEffectDetails.java | 99 +++++++++---------- ...ExperimentBatchInformationServiceImpl.java | 99 +++++++++++-------- .../experiment/GeeqServiceImpl.java | 2 +- ...ExperimentBatchInformationServiceTest.java | 70 +++++++++++-- 5 files changed, 172 insertions(+), 100 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java index 49e0694a2e..e7eb1000a3 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java @@ -447,7 +447,7 @@ private void qcCheck( LinkAnalysisConfig config, ExpressionExperiment ee ) throw if ( config.isCheckForBatchEffect() ) { BatchEffectDetails batchEffect = expressionExperimentBatchInformationService.getBatchEffectDetails( ee ); - if ( batchEffect.getDataWasBatchCorrected() ) { + if ( batchEffect.dataWasBatchCorrected() ) { LinkAnalysisServiceImpl.log.info( "Data are batch-corrected" ); return; } diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchEffectDetails.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchEffectDetails.java index 4c165e46e3..6fbf130241 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchEffectDetails.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchEffectDetails.java @@ -54,28 +54,12 @@ public double getPvalue() { } } - /** - * Indicate if the batch information is present. - */ private final boolean hasBatchInformation; - /** - * Indicate if the batch information is uninformative. - */ private final boolean hasUninformativeBatchInformation; - /** - * Indicate if the batch information is problematic. - */ private final boolean hasProblematicBatchInformation; - /** - * Indicate if the dataset has singleton batches (i.e. a batch only one sample). - */ private final boolean hasSingletonBatches; - /** - * Indicate if batch correction was performed on the expression data. - */ - private final boolean dataWasBatchCorrected; - private final boolean singleBatch; + private final boolean dataWasBatchCorrected; /* if present and suitable, those are filled */ private boolean hasBatchEffectStatistics = false; @@ -84,64 +68,71 @@ public double getPvalue() { private double componentVarianceProportion; public BatchEffectDetails( @Nullable BatchInformationEvent infoEvent, boolean dataWasBatchCorrected, boolean singleBatch ) { - - if ( infoEvent != null && ( BatchInformationMissingEvent.class.isAssignableFrom( infoEvent.getClass() ) ) ) { + if ( infoEvent instanceof BatchInformationFetchingEvent ) { + this.hasBatchInformation = true; + // FIXME hasProblematicBatchInformation should not be assigned when there is no batch information available. + this.hasProblematicBatchInformation = infoEvent instanceof FailedBatchInformationFetchingEvent; + this.hasSingletonBatches = infoEvent instanceof SingletonBatchInvalidEvent; + this.hasUninformativeBatchInformation = infoEvent instanceof UninformativeFASTQHeadersForBatchingEvent; + this.dataWasBatchCorrected = dataWasBatchCorrected; + this.singleBatch = singleBatch; + } else { + // infoEvent is either null or a BatchInformationMissingEvent + Assert.isTrue( infoEvent == null || infoEvent instanceof BatchInformationMissingEvent ); this.hasBatchInformation = false; this.hasProblematicBatchInformation = false; this.hasSingletonBatches = false; this.hasUninformativeBatchInformation = false; this.dataWasBatchCorrected = false; this.singleBatch = false; - this.pvalue = 1.0; - return; - } - - this.hasBatchInformation = infoEvent != null; - if ( infoEvent != null ) { - // FIXME hasProblematicBatchInformation should not be assigned when there is no batch information available. - this.hasProblematicBatchInformation = FailedBatchInformationFetchingEvent.class.isAssignableFrom( ( infoEvent.getClass() ) ); - - this.hasSingletonBatches = SingletonBatchInvalidEvent.class.isAssignableFrom( infoEvent.getClass() ); - this.hasUninformativeBatchInformation = UninformativeFASTQHeadersForBatchingEvent.class.isAssignableFrom( infoEvent.getClass() ); - } else { - this.hasProblematicBatchInformation = false; - this.hasSingletonBatches = false; - this.hasUninformativeBatchInformation = false; } - this.dataWasBatchCorrected = dataWasBatchCorrected; - this.singleBatch = singleBatch; - this.pvalue = 1.0; } - public boolean getDataWasBatchCorrected() { - return this.dataWasBatchCorrected; + /** + * Indicate if the batch information is present. + */ + public boolean hasBatchInformation() { + return hasBatchInformation; } - public boolean getHasSingletonBatches() { - return hasSingletonBatches; + /** + * Indicate if the batch information is present, but problematic. + */ + public boolean hasProblematicBatchInformation() { + return hasProblematicBatchInformation; } - public boolean getHasUninformativeBatchInformation() { + /** + * Indicate if the batch information is present, but uninformative. + */ + public boolean hasUninformativeBatchInformation() { return hasUninformativeBatchInformation; } - public boolean hasBatchInformation() { - return hasBatchInformation; - } - - public boolean hasProblematicBatchInformation() { - return hasProblematicBatchInformation; + /** + * Indicate if the dataset has one or more singleton batches (i.e. a batch with only one sample). + */ + public boolean hasSingletonBatches() { + return hasSingletonBatches; } /** - * - * @return true if the experiment was determined to have just one batch, or false for any other state (including we - * don't know) + * Indicate if the experiment was determined to have just one batch, or false for any other state (including we don't know). */ public boolean isSingleBatch() { return singleBatch; } + /** + * Indicate if batch correction was performed on the expression data. + */ + public boolean dataWasBatchCorrected() { + return this.dataWasBatchCorrected; + } + + /** + * Obtain an object describing the batch effect if available. + */ @Nullable public BatchEffectStatistics getBatchEffectStatistics() { if ( hasBatchEffectStatistics ) { @@ -151,6 +142,12 @@ public BatchEffectStatistics getBatchEffectStatistics() { } } + /** + * Set the batch effect statistics. + * @param pVal P-value + * @param i component connfounded by the bat + * @param variance variance explained by the component + */ public void setBatchEffectStatistics( double pVal, int i, double variance ) { Assert.isTrue( pVal >= 0 ); Assert.isTrue( pVal <= 1 ); diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationServiceImpl.java index a565300166..f89cbd5653 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchInformationServiceImpl.java @@ -9,10 +9,7 @@ import ubic.gemma.core.analysis.preprocess.svd.SVDService; import ubic.gemma.core.analysis.preprocess.svd.SVDValueObject; import ubic.gemma.model.common.auditAndSecurity.AuditEvent; -import ubic.gemma.model.common.auditAndSecurity.eventType.BatchInformationEvent; -import ubic.gemma.model.common.auditAndSecurity.eventType.BatchInformationFetchingEvent; -import ubic.gemma.model.common.auditAndSecurity.eventType.FailedBatchInformationFetchingEvent; -import ubic.gemma.model.common.auditAndSecurity.eventType.SingleBatchDeterminationEvent; +import ubic.gemma.model.common.auditAndSecurity.eventType.*; import ubic.gemma.model.common.quantitationtype.QuantitationType; import ubic.gemma.model.expression.experiment.BatchEffectType; import ubic.gemma.model.expression.experiment.ExperimentalFactor; @@ -152,48 +149,56 @@ public BatchEffectDetails getBatchEffectDetails( ExpressionExperiment ee ) { return details; } - for ( ExperimentalFactor ef : ee.getExperimentalDesign().getExperimentalFactors() ) { - if ( BatchInfoPopulationServiceImpl.isBatchFactor( ef ) ) { - SVDValueObject svd = svdService.getSvdFactorAnalysis( ee.getId() ); - if ( svd == null ) { - log.warn( "SVD was null for " + ef + ", can't compute batch effect statistics." ); - break; - } - - // Use the "date run" information as a first pass to decide if there is a batch association. - // This won't always be present. - double minP = 1.0; - if ( svd.getDatePvals() != null ) { - for ( Integer component : svd.getDatePvals().keySet() ) { - Double pVal = svd.getDatePvals().get( component ); - if ( pVal != null && pVal < minP ) { - details.setBatchEffectStatistics( pVal, component + 1, svd.getVariances()[component] ); - minP = pVal; - } - } - } + if ( ee.getExperimentalDesign() == null ) { + log.warn( ee + " have batch information, but it does not have an experimental design to determine the batch effect." ); + return details; + } - // we can override the date-based p-value with the factor-based p-value if it is lower. - // The reason to do this is it can be underpowered. The date-based one is more sensitive. - for ( Integer component : svd.getFactorPvals().keySet() ) { - Map cmpEffects = svd.getFactorPvals().get( component ); + ExperimentalFactor ef = ee.getExperimentalDesign().getExperimentalFactors() + .stream() + .filter( BatchInfoPopulationServiceImpl::isBatchFactor ) + .findFirst() + .orElse( null ); - // could use the effect size instead of the p-values (or in addition) - //Map cmpEffectSizes = svd.getFactorCorrelations().get( component ); + if ( ef == null ) { + log.warn( String.format( "No suitable batch factor was found for %s to obtain batch effect statistics.", ee ) ); + return details; + } - Double pVal = cmpEffects.get( ef.getId() ); - if ( pVal != null && pVal < minP ) { - details.setBatchEffectStatistics( pVal, component + 1, svd.getVariances()[component] ); - minP = pVal; - } + SVDValueObject svd = svdService.getSvdFactorAnalysis( ee.getId() ); + if ( svd == null ) { + log.warn( "SVD was null for " + ef + ", can't compute batch effect statistics." ); + return details; + } + // Use the "date run" information as a first pass to decide if there is a batch association. + // This won't always be present. + double minP = 1.0; + if ( svd.getDatePvals() != null ) { + for ( Integer component : svd.getDatePvals().keySet() ) { + Double pVal = svd.getDatePvals().get( component ); + if ( pVal != null && pVal < minP ) { + details.setBatchEffectStatistics( pVal, component + 1, svd.getVariances()[component] ); + minP = pVal; } - return details; } } - log.warn( String.format( "No suitable batch factor was found for %s to obtain batch effect statistics.", ee ) ); + // we can override the date-based p-value with the factor-based p-value if it is lower. + // The reason to do this is it can be underpowered. The date-based one is more sensitive. + for ( Integer component : svd.getFactorPvals().keySet() ) { + Map cmpEffects = svd.getFactorPvals().get( component ); + // could use the effect size instead of the p-values (or in addition) + //Map cmpEffectSizes = svd.getFactorCorrelations().get( component ); + + Double pVal = cmpEffects.get( ef.getId() ); + if ( pVal != null && pVal < minP ) { + details.setBatchEffectStatistics( pVal, component + 1, svd.getVariances()[component] ); + minP = pVal; + } + + } return details; } @@ -203,9 +208,9 @@ public BatchEffectType getBatchEffect( ExpressionExperiment ee ) { BatchEffectDetails beDetails = this.getBatchEffectDetails( ee ); BatchEffectDetails.BatchEffectStatistics batchEffectStatistics = beDetails.getBatchEffectStatistics(); - if ( beDetails.getHasSingletonBatches() ) { + if ( beDetails.hasSingletonBatches() ) { return BatchEffectType.SINGLETON_BATCHES_FAILURE; - } else if ( beDetails.getHasUninformativeBatchInformation() ) { + } else if ( beDetails.hasUninformativeBatchInformation() ) { return BatchEffectType.UNINFORMATIVE_HEADERS_FAILURE; } else if ( !beDetails.hasBatchInformation() ) { return BatchEffectType.NO_BATCH_INFO; @@ -213,7 +218,7 @@ public BatchEffectType getBatchEffect( ExpressionExperiment ee ) { return BatchEffectType.PROBLEMATIC_BATCH_INFO_FAILURE; } else if ( beDetails.isSingleBatch() ) { return BatchEffectType.SINGLE_BATCH_SUCCESS; - } else if ( beDetails.getDataWasBatchCorrected() ) { + } else if ( beDetails.dataWasBatchCorrected() ) { // Checked for in ExpressionExperimentDetails.js::renderStatus() return BatchEffectType.BATCH_CORRECTED_SUCCESS; } else { @@ -263,8 +268,20 @@ private BatchInformationEvent checkBatchFetchStatus( ExpressionExperiment ee ) { if ( hasBatchFactor( ee ) ) { return new BatchInformationFetchingEvent(); } + AuditEvent ev = auditEventService.getLastEvent( ee, BatchInformationEvent.class ); - return ev != null ? ( BatchInformationEvent ) ev.getEventType() : null; + + if ( ev == null ) + return null; + + // prior to 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b, cases of missing batch information was incorrectly typed + // see https://github.com/PavlidisLab/Gemma/issues/1155 for details + if ( ev.getEventType() instanceof FailedBatchInformationFetchingEvent + && ev.getNote() != null && ev.getNote().contains( "No header file for" ) ) { + return new BatchInformationMissingEvent(); + } + + return ( BatchInformationEvent ) ev.getEventType(); } private boolean hasBatchFactor( ExpressionExperiment ee ) { diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/GeeqServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/GeeqServiceImpl.java index d59b6bed74..5cbb74d47a 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/GeeqServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/GeeqServiceImpl.java @@ -523,7 +523,7 @@ private void scoreBatchEffect( ExpressionExperiment ee, Geeq gq, boolean infoDet } else { BatchEffectDetails be = expressionExperimentBatchInformationService.getBatchEffectDetails( ee ); hasInfo = be.hasBatchInformation(); - corrected = be.getDataWasBatchCorrected(); + corrected = be.dataWasBatchCorrected(); BatchEffectDetails.BatchEffectStatistics statistics = be.getBatchEffectStatistics(); if ( statistics != null ) { hasStrong = statistics.getPvalue() < 0.0001; diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentBatchInformationServiceTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentBatchInformationServiceTest.java index 604d8ea077..41b1a729fd 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentBatchInformationServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentBatchInformationServiceTest.java @@ -1,5 +1,6 @@ package ubic.gemma.persistence.service.expression.experiment; +import org.junit.After; import org.junit.Before; import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; @@ -7,23 +8,25 @@ import org.springframework.context.annotation.Configuration; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests; +import ubic.gemma.core.analysis.preprocess.batcheffects.BatchEffectDetails; import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService; import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationServiceImpl; import ubic.gemma.core.analysis.preprocess.svd.SVDService; +import ubic.gemma.core.analysis.preprocess.svd.SVDValueObject; import ubic.gemma.core.context.TestComponent; import ubic.gemma.model.common.auditAndSecurity.AuditAction; import ubic.gemma.model.common.auditAndSecurity.AuditEvent; import ubic.gemma.model.common.auditAndSecurity.eventType.*; -import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.model.common.description.Characteristic; +import ubic.gemma.model.expression.experiment.*; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; +import java.util.Collections; import java.util.Date; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.*; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; +import static org.mockito.Mockito.*; @ContextConfiguration public class ExpressionExperimentBatchInformationServiceTest extends AbstractJUnit4SpringContextTests { @@ -63,13 +66,49 @@ public AuditEventService auditEventService() { @Autowired private AuditEventService auditEventService; + @Autowired + private SVDService svdService; + @Before public void setUp() { when( expressionExperimentService.thawLiter( any() ) ).thenAnswer( a -> a.getArgument( 0 ) ); } + @After + public void resetMocks() { + reset( expressionExperimentService, svdService ); + } + + @Test + public void test() { + SVDValueObject svdResult = mock(); + when( svdResult.getDatePvals() ).thenReturn( Collections.singletonMap( 0, 0.0000001 ) ); + when( svdResult.getVariances() ).thenReturn( new double[] { 0.99 } ); + when( svdService.getSvdFactorAnalysis( 1L ) ).thenReturn( svdResult ); + ExperimentalFactor batchFactor = new ExperimentalFactor(); + batchFactor.setName( ExperimentalDesignUtils.BATCH_FACTOR_NAME ); + Characteristic c = Characteristic.Factory.newInstance(); + c.setCategory( ExperimentalDesignUtils.BATCH_FACTOR_CATEGORY_NAME ); + batchFactor.setCategory( c ); + ExpressionExperiment ee = new ExpressionExperiment(); + ee.setId( 1L ); + ee.setExperimentalDesign( new ExperimentalDesign() ); + ee.getExperimentalDesign().getExperimentalFactors().add( batchFactor ); + assertTrue( eeBatchService.checkHasBatchInfo( ee ) ); + assertTrue( eeBatchService.checkHasUsableBatchInfo( ee ) ); + assertFalse( eeBatchService.getBatchEffectDetails( ee ).dataWasBatchCorrected() ); + BatchEffectDetails.BatchEffectStatistics stats = eeBatchService.getBatchEffectDetails( ee ).getBatchEffectStatistics(); + assertNotNull( stats ); + assertEquals( 1, stats.getComponent() ); + assertEquals( 0.0000001, stats.getPvalue(), 0 ); + assertEquals( 0.99, stats.getComponentVarianceProportion(), 0 ); + } + + /** + * Cover various edge cases of missing batch information. + */ @Test - public void testBatchInfo() { + public void testMissingBatchInformation() { AuditEventType aet; AuditEvent ae; ExpressionExperiment ee; @@ -78,6 +117,9 @@ public void testBatchInfo() { ee = new ExpressionExperiment(); assertFalse( eeBatchService.checkHasBatchInfo( ee ) ); assertFalse( eeBatchService.checkHasUsableBatchInfo( ee ) ); + assertFalse( eeBatchService.getBatchEffectDetails( ee ).hasBatchInformation() ); + assertFalse( eeBatchService.getBatchEffectDetails( ee ).hasUninformativeBatchInformation() ); + assertEquals( BatchEffectType.NO_BATCH_INFO, eeBatchService.getBatchEffect( ee ) ); ee = new ExpressionExperiment(); aet = new BatchInformationFetchingEvent(); @@ -85,6 +127,8 @@ public void testBatchInfo() { when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); assertTrue( eeBatchService.checkHasBatchInfo( ee ) ); assertTrue( eeBatchService.checkHasUsableBatchInfo( ee ) ); + assertTrue( eeBatchService.getBatchEffectDetails( ee ).hasBatchInformation() ); + assertEquals( BatchEffectType.BATCH_EFFECT_UNDETERMINED_FAILURE, eeBatchService.getBatchEffect( ee ) ); ee = new ExpressionExperiment(); aet = new SingleBatchDeterminationEvent(); @@ -92,6 +136,8 @@ public void testBatchInfo() { when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); assertTrue( eeBatchService.checkHasBatchInfo( ee ) ); assertTrue( eeBatchService.checkHasUsableBatchInfo( ee ) ); + assertTrue( eeBatchService.getBatchEffectDetails( ee ).hasBatchInformation() ); + assertEquals( BatchEffectType.BATCH_EFFECT_UNDETERMINED_FAILURE, eeBatchService.getBatchEffect( ee ) ); ee = new ExpressionExperiment(); aet = new BatchInformationMissingEvent(); @@ -99,6 +145,9 @@ public void testBatchInfo() { when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); assertFalse( eeBatchService.checkHasBatchInfo( ee ) ); assertFalse( eeBatchService.checkHasUsableBatchInfo( ee ) ); + assertFalse( eeBatchService.getBatchEffectDetails( ee ).hasBatchInformation() ); + assertFalse( eeBatchService.getBatchEffectDetails( ee ).hasProblematicBatchInformation() ); + assertEquals( BatchEffectType.NO_BATCH_INFO, eeBatchService.getBatchEffect( ee ) ); // batch info missing (after 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b) ee = new ExpressionExperiment(); @@ -107,6 +156,9 @@ public void testBatchInfo() { when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); assertFalse( eeBatchService.checkHasBatchInfo( ee ) ); assertFalse( eeBatchService.checkHasUsableBatchInfo( ee ) ); + assertFalse( eeBatchService.getBatchEffectDetails( ee ).hasBatchInformation() ); + assertFalse( eeBatchService.getBatchEffectDetails( ee ).hasProblematicBatchInformation() ); + assertEquals( BatchEffectType.NO_BATCH_INFO, eeBatchService.getBatchEffect( ee ) ); // batch info failed (prior to 23f7dcdbcbbf7b137c74abf2b6df96134bddc88b) ee = new ExpressionExperiment(); @@ -115,6 +167,9 @@ public void testBatchInfo() { when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); assertFalse( eeBatchService.checkHasBatchInfo( ee ) ); assertFalse( eeBatchService.checkHasUsableBatchInfo( ee ) ); + assertFalse( eeBatchService.getBatchEffectDetails( ee ).hasBatchInformation() ); + assertFalse( eeBatchService.getBatchEffectDetails( ee ).hasProblematicBatchInformation() ); + assertEquals( BatchEffectType.NO_BATCH_INFO, eeBatchService.getBatchEffect( ee ) ); // has batch information, but it's got some issues ee = new ExpressionExperiment(); @@ -123,5 +178,8 @@ public void testBatchInfo() { when( auditEventService.getLastEvent( ee, BatchInformationEvent.class ) ).thenReturn( ae ); assertTrue( eeBatchService.checkHasBatchInfo( ee ) ); assertFalse( eeBatchService.checkHasUsableBatchInfo( ee ) ); + assertTrue( eeBatchService.getBatchEffectDetails( ee ).hasBatchInformation() ); + assertTrue( eeBatchService.getBatchEffectDetails( ee ).hasProblematicBatchInformation() ); + assertEquals( BatchEffectType.PROBLEMATIC_BATCH_INFO_FAILURE, eeBatchService.getBatchEffect( ee ) ); } } \ No newline at end of file From 5ff9847caf075962e3eb9f638ff6a5bba6343447 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 21 Jun 2024 11:26:52 -0700 Subject: [PATCH 54/81] Add missing error schemas for getAnnotationsParents() and getAnnotationsChildren() Improve the test to better indicate which endpoints are problematic. --- .../gemma/rest/AnnotationsWebService.java | 9 ++++---- .../java/ubic/gemma/rest/OpenApiTest.java | 22 ++++++++++++++----- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/AnnotationsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/AnnotationsWebService.java index 5bd95a3789..a2dcb12bbe 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/AnnotationsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/AnnotationsWebService.java @@ -122,8 +122,8 @@ public AnnotationsWebService( OntologyService ontologyService, SearchService sea description = "Terms that are returned satisfies the [rdfs:subClassOf](https://www.w3.org/TR/2012/REC-owl2-syntax-20121211/#Subclass_Axioms) or [part_of](http://purl.obolibrary.org/obo/BFO_0000050) relations. When `direct` is set to false, this rule is applied recursively.", responses = { @ApiResponse(useReturnTypeSchema = true, content = @Content()), - @ApiResponse(responseCode = "404", description = "No term matched the given URI."), - @ApiResponse(responseCode = "503", description = "Ontology inference timed out.") }) + @ApiResponse(responseCode = "404", description = "No term matched the given URI.", content = @Content(schema = @Schema(implementation = ResponseErrorObject.class))), + @ApiResponse(responseCode = "503", description = "Ontology inference timed out.", content = @Content(schema = @Schema(implementation = ResponseErrorObject.class))) }) public List getAnnotationsParents( @Parameter(description = "Term URI") @QueryParam("uri") String termUri, @Parameter(description = "Only include direct children.") @QueryParam("direct") @DefaultValue("false") boolean direct ) { @@ -142,8 +142,9 @@ public List getAnnotationsParents( description = "Terms that are returned satisfies the [inverse of rdfs:subClassOf](https://www.w3.org/TR/2012/REC-owl2-syntax-20121211/#Subclass_Axioms) or [has_part](http://purl.obolibrary.org/obo/BFO_0000051) relations. When `direct` is set to false, this rule is applied recursively.", responses = { @ApiResponse(useReturnTypeSchema = true, content = @Content()), - @ApiResponse(responseCode = "404", description = "No term matched the given URI."), - @ApiResponse(responseCode = "503", description = "Ontology inference timed out.") }) + @ApiResponse(responseCode = "404", description = "No term matched the given URI.", content = @Content(schema = @Schema(implementation = ResponseErrorObject.class))), + @ApiResponse(responseCode = "503", description = "Ontology inference timed out.", content = @Content(schema = @Schema(implementation = ResponseErrorObject.class))) + }) public List getAnnotationsChildren( @Parameter(description = "Term URI") @QueryParam("uri") String termUri, @Parameter(description = "Only include direct parents.") @QueryParam("direct") @DefaultValue("false") boolean direct ) { diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/OpenApiTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/OpenApiTest.java index 5b4a7a44ee..3c71964134 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/OpenApiTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/OpenApiTest.java @@ -4,10 +4,13 @@ import io.swagger.v3.core.util.Json; import io.swagger.v3.core.util.Yaml; import io.swagger.v3.oas.models.OpenAPI; +import io.swagger.v3.oas.models.PathItem; import io.swagger.v3.oas.models.media.Schema; +import io.swagger.v3.oas.models.responses.ApiResponse; import lombok.Data; import org.assertj.core.api.Assertions; import org.assertj.core.api.Condition; +import org.assertj.core.api.SoftAssertions; import org.junit.Before; import org.junit.Test; import org.springframework.beans.factory.FactoryBean; @@ -17,6 +20,7 @@ import org.springframework.core.io.ClassPathResource; import org.springframework.security.access.AccessDecisionManager; import org.springframework.test.context.ContextConfiguration; +import ubic.gemma.core.context.TestComponent; import ubic.gemma.core.search.SearchService; import ubic.gemma.core.util.BuildInfo; import ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet; @@ -24,7 +28,6 @@ import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.genome.Taxon; -import ubic.gemma.core.context.TestComponent; import ubic.gemma.rest.analytics.AnalyticsProvider; import ubic.gemma.rest.swagger.resolver.CustomModelResolver; import ubic.gemma.rest.util.BaseJerseyTest; @@ -35,6 +38,7 @@ import java.io.IOException; import java.io.InputStream; import java.util.Collections; +import java.util.Map; import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.Mockito.mock; @@ -165,17 +169,23 @@ public void testEnsureThatAllEndpointHaveADefaultGetResponseOrIsARedirection() { @Test public void testEnsureThatAllErrorResponsesUseResponseErrorObjectWithJsonMediaType() { - assertThat( spec.getPaths() ).allSatisfy( ( path, operations ) -> { - assertThat( operations.getGet().getResponses() ).allSatisfy( ( code, response ) -> { + SoftAssertions assertions = new SoftAssertions(); + for ( Map.Entry entry : spec.getPaths().entrySet() ) { + String path = entry.getKey(); + PathItem operations = entry.getValue(); + for ( Map.Entry e : operations.getGet().getResponses().entrySet() ) { + String code = e.getKey(); + ApiResponse response = e.getValue(); if ( code.startsWith( "4" ) || code.startsWith( "5" ) ) { - assertThat( response.getContent() ) + assertions.assertThat( response.getContent() ) .describedAs( "GET %s -> %s", path, code ) .hasEntrySatisfying( "application/json", content -> { assertThat( content.getSchema().get$ref() ).isEqualTo( "#/components/schemas/ResponseErrorObject" ); } ); } - } ); - } ); + } + } + assertions.assertAll(); } @Test From 0d42f5227c79e03c7f8c94cb006c86d1556d81ed Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 21 Jun 2024 11:51:33 -0700 Subject: [PATCH 55/81] Add a check when deleting an experiment with co-expression links --- .../coexpression/CoexpressionDao.java | 2 + .../coexpression/CoexpressionDaoImpl.java | 9 ++++ .../coexpression/CoexpressionService.java | 6 ++- .../coexpression/CoexpressionServiceImpl.java | 5 ++ .../ExpressionExperimentServiceImpl.java | 8 +++ .../core/util/test/BaseDatabaseTest.java | 1 + .../coexpression/CoexpressionDaoTest.java | 51 +++++++++++++++++++ .../ExpressionExperimentServiceImplTest.java | 9 +++- .../ExpressionExperimentServiceTest.java | 25 ++++++++- 9 files changed, 112 insertions(+), 4 deletions(-) create mode 100644 gemma-core/src/test/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionDaoTest.java diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionDao.java index 845fe36c3c..351bc711aa 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionDao.java @@ -36,6 +36,8 @@ */ public interface CoexpressionDao { + boolean hasLinks( Taxon taxon, BioAssaySet ee ); + Integer countLinks( Gene gene, BioAssaySet ee ); /** diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionDaoImpl.java index 08b5579d61..25a4f03e07 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionDaoImpl.java @@ -107,6 +107,15 @@ public class CoexpressionDaoImpl implements CoexpressionDao { @Autowired private SessionFactory sessionFactory; + @Override + public boolean hasLinks( Taxon taxon, BioAssaySet ee ) { + return ( Boolean ) sessionFactory.getCurrentSession() + .createQuery( "select count(*) > 0 from " + CoexpressionQueryUtils.getExperimentLinkClassName( taxon ) + " e " + + "where e.experiment = :ee" ) + .setParameter( "ee", ee ) + .uniqueResult(); + } + @Override public Integer countLinks( Gene gene, BioAssaySet ee ) { // Looking at the first gene is enough if we save the flipped versions; we don't get a double-count here because diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionService.java index d1ba93a00b..a13f519a05 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionService.java @@ -43,6 +43,11 @@ */ public interface CoexpressionService { + /** + * Check if a given dataset has coexpression links. + */ + boolean hasLinks( BioAssaySet ee ); + /** * @param gene gene * @param ee bio assay set @@ -173,5 +178,4 @@ Map initializeLinksFromOldData( Gene gene @Secured("GROUP_ADMIN") Map countOldLinks( Collection genes ); - } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionServiceImpl.java index 8ae52ec082..67150dba66 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionServiceImpl.java @@ -64,6 +64,11 @@ public class CoexpressionServiceImpl implements CoexpressionService { @Autowired private GeneDao geneDao; + @Override + public boolean hasLinks( BioAssaySet ee ) { + return coexpressionDao.hasLinks( this.experimentDao.getTaxon( ee ), ee ); + } + @Override @Transactional(readOnly = true) public Integer countLinks( BioAssaySet ee, Gene gene ) { diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java index c1ce5fc051..88a7b0f8d7 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java @@ -67,6 +67,7 @@ import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; import ubic.gemma.persistence.service.analysis.expression.pca.PrincipalComponentAnalysisService; import ubic.gemma.persistence.service.analysis.expression.sampleCoexpression.SampleCoexpressionAnalysisService; +import ubic.gemma.persistence.service.association.coexpression.CoexpressionService; import ubic.gemma.persistence.service.blacklist.BlacklistedEntityService; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; import ubic.gemma.persistence.service.common.quantitationtype.QuantitationTypeService; @@ -131,6 +132,8 @@ public class ExpressionExperimentServiceImpl private SampleCoexpressionAnalysisService sampleCoexpressionAnalysisService; @Autowired private BlacklistedEntityService blacklistedEntityService; + @Autowired + private CoexpressionService coexpressionService; @Autowired public ExpressionExperimentServiceImpl( ExpressionExperimentDao expressionExperimentDao ) { @@ -1321,6 +1324,11 @@ public void remove( ExpressionExperiment ee ) { + " You do not have permission to edit this experiment." ); } + // check if a dataset has coexpression links + if ( this.coexpressionService.hasLinks( ee ) ) { + throw new IllegalStateException( ee + " has coexpression links, those must be removed first with 'gemma-cli coexpAnalyze -delete'." ); + } + // Remove subsets Collection subsets = this.getSubSets( ee ); for ( ExpressionExperimentSubSet subset : subsets ) { diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/test/BaseDatabaseTest.java b/gemma-core/src/test/java/ubic/gemma/core/util/test/BaseDatabaseTest.java index 8ab525d3e9..7a166568d7 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/util/test/BaseDatabaseTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/util/test/BaseDatabaseTest.java @@ -28,6 +28,7 @@ import org.springframework.security.acls.domain.SpringCacheBasedAclCache; import org.springframework.security.core.GrantedAuthority; import org.springframework.security.core.authority.SimpleGrantedAuthority; +import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.AbstractTransactionalJUnit4SpringContextTests; import org.springframework.test.jdbc.JdbcTestUtils; import org.springframework.transaction.PlatformTransactionManager; diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionDaoTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionDaoTest.java new file mode 100644 index 0000000000..076a3eb398 --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/association/coexpression/CoexpressionDaoTest.java @@ -0,0 +1,51 @@ +package ubic.gemma.persistence.service.association.coexpression; + +import org.junit.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.test.context.ContextConfiguration; +import ubic.gemma.core.context.TestComponent; +import ubic.gemma.core.util.test.BaseDatabaseTest; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.model.genome.Taxon; + +import static org.junit.Assert.assertFalse; +import static org.mockito.Mockito.mock; + +@ContextConfiguration +public class CoexpressionDaoTest extends BaseDatabaseTest { + + @Configuration + @TestComponent + static class CoexpressionDaoTestContextConfiguration extends BaseDatabaseTestContextConfiguration { + + @Bean + public CoexpressionDao coexpressionDao() { + return new CoexpressionDaoImpl(); + } + + @Bean + public CoexpressionCache gene2GeneCoexpressionCache() { + return mock(); + } + + @Bean + public GeneTestedInCache geneTestedInCache() { + return mock(); + } + } + + @Autowired + private CoexpressionDao coexpressionDao; + + @Test + public void testHasLinks() { + Taxon taxon = new Taxon(); + ExpressionExperiment ee = new ExpressionExperiment(); + ee.setTaxon( taxon ); + sessionFactory.getCurrentSession().persist( taxon ); + sessionFactory.getCurrentSession().persist( ee ); + assertFalse( coexpressionDao.hasLinks( taxon, ee ) ); + } +} \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImplTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImplTest.java index 868603e51b..65a994c505 100755 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImplTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImplTest.java @@ -29,6 +29,7 @@ import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests; import ubic.gemma.core.analysis.preprocess.svd.SVDService; +import ubic.gemma.core.context.TestComponent; import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.core.search.SearchService; import ubic.gemma.model.common.auditAndSecurity.User; @@ -43,13 +44,12 @@ import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; import ubic.gemma.persistence.service.analysis.expression.pca.PrincipalComponentAnalysisService; import ubic.gemma.persistence.service.analysis.expression.sampleCoexpression.SampleCoexpressionAnalysisService; +import ubic.gemma.persistence.service.association.coexpression.CoexpressionService; import ubic.gemma.persistence.service.blacklist.BlacklistedEntityService; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; import ubic.gemma.persistence.service.common.quantitationtype.QuantitationTypeService; import ubic.gemma.persistence.service.expression.bioAssayData.BioAssayDimensionService; import ubic.gemma.persistence.service.expression.biomaterial.BioMaterialService; -import ubic.gemma.persistence.service.expression.experiment.*; -import ubic.gemma.core.context.TestComponent; import java.util.Collection; import java.util.Collections; @@ -171,6 +171,11 @@ public BlacklistedEntityService blacklistedEntityService() { public AccessDecisionManager accessDecisionManager() { return mock( AccessDecisionManager.class ); } + + @Bean + public CoexpressionService coexpressionService() { + return mock(); + } } @Autowired diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java index 9c605b495f..4d8af9ab79 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceTest.java @@ -14,10 +14,12 @@ import ubic.gemma.core.context.TestComponent; import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.core.search.SearchService; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.analysis.expression.coexpression.CoexpressionAnalysisService; import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; import ubic.gemma.persistence.service.analysis.expression.pca.PrincipalComponentAnalysisService; import ubic.gemma.persistence.service.analysis.expression.sampleCoexpression.SampleCoexpressionAnalysisService; +import ubic.gemma.persistence.service.association.coexpression.CoexpressionService; import ubic.gemma.persistence.service.blacklist.BlacklistedEntityService; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; import ubic.gemma.persistence.service.common.quantitationtype.QuantitationTypeService; @@ -31,6 +33,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.mockito.Mockito.*; /** @@ -147,6 +150,11 @@ public BlacklistedEntityService blacklistedEntityService() { public AccessDecisionManager accessDecisionManager() { return mock( AccessDecisionManager.class ); } + + @Bean + public CoexpressionService coexpressionService() { + return mock(); + } } @Autowired @@ -158,9 +166,15 @@ public AccessDecisionManager accessDecisionManager() { @Autowired private OntologyService ontologyService; + @Autowired + private CoexpressionService coexpressionService; + + @Autowired + private SecurityService securityService; + @After public void tearDown() { - reset( ontologyService ); + reset( ontologyService, coexpressionService, securityService ); } @Test @@ -197,4 +211,13 @@ public void testGetAnnotationsUsageFrequencyWithFilters() throws TimeoutExceptio verify( expressionExperimentDao ).getAnnotationsUsageFrequency( Collections.emptyList(), null, -1, 0, null, null, null, null ); verifyNoMoreInteractions( expressionExperimentDao ); } + + @Test + public void testRemoveDatasetWithCoexpressionLinks() { + ExpressionExperiment ee = new ExpressionExperiment(); + when( coexpressionService.hasLinks( ee ) ).thenReturn( true ); + when( securityService.isEditable( ee ) ).thenReturn( true ); + assertThatThrownBy( () -> expressionExperimentService.remove( ee ) ) + .isInstanceOf( IllegalStateException.class ); + } } From 68e5eb9137fd875a11941097175fbfbd9c9b7a11 Mon Sep 17 00:00:00 2001 From: Paul Pavlidis Date: Mon, 10 Jun 2024 13:53:09 -0700 Subject: [PATCH 56/81] handle library source from GEO "transcriptomic single cell" +test for now we just reject these samples, behaviour we'll want to change later --- .../expression/geo/GeoConverterImpl.java | 451 ++++++------------ .../expression/geo/GeoFamilyParser.java | 18 +- .../expression/geo/GeoConverterTest.java | 10 +- .../expression/geo/GSE235534_family.soft.gz | Bin 0 -> 7346 bytes 4 files changed, 153 insertions(+), 326 deletions(-) create mode 100644 gemma-core/src/test/resources/data/loader/expression/geo/GSE235534_family.soft.gz diff --git a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/geo/GeoConverterImpl.java b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/geo/GeoConverterImpl.java index 0c310671fe..b5b4b1dfb3 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/geo/GeoConverterImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/geo/GeoConverterImpl.java @@ -123,8 +123,7 @@ public class GeoConverterImpl implements GeoConverter { /** * More than this and we apply stricter selection criteria for choosing elements to keep on a platform. */ - private int tooManyElements = Settings - .getInt( "geo.platform.import.maxelements", GeoConverterImpl.DEFAULT_DEFINITION_OF_TOO_MANY_ELEMENTS ); + private int tooManyElements = Settings.getInt( "geo.platform.import.maxelements", GeoConverterImpl.DEFAULT_DEFINITION_OF_TOO_MANY_ELEMENTS ); @Autowired private ExternalDatabaseService externalDatabaseService; @Autowired @@ -196,16 +195,13 @@ public Object convert( GeoData geoObject, boolean skipDataVectors ) { } else if ( geoObject instanceof GeoSeries ) { // typically we start here, with a series. return this.convertSeries( ( GeoSeries ) geoObject ); } else if ( geoObject instanceof GeoSubset ) { - throw new IllegalArgumentException( - "Can't deal with " + geoObject.getClass().getName() + " ('" + geoObject + "')" ); + throw new IllegalArgumentException( "Can't deal with " + geoObject.getClass().getName() + " ('" + geoObject + "')" ); } else if ( geoObject instanceof GeoSample ) { - throw new IllegalArgumentException( - "Can't deal with " + geoObject.getClass().getName() + " ('" + geoObject + "')" ); + throw new IllegalArgumentException( "Can't deal with " + geoObject.getClass().getName() + " ('" + geoObject + "')" ); } else if ( geoObject instanceof GeoPlatform ) { return this.convertPlatform( ( GeoPlatform ) geoObject ); } else { - throw new IllegalArgumentException( - "Can't deal with " + geoObject.getClass().getName() + " ('" + geoObject + "')" ); + throw new IllegalArgumentException( "Can't deal with " + geoObject.getClass().getName() + " ('" + geoObject + "')" ); } } @@ -234,8 +230,7 @@ public void convertSubsetToExperimentalFactor( ExpressionExperiment expExp, GeoS duplicateExists = true; experimentalFactor = existingExperimentalFactor; if ( GeoConverterImpl.log.isDebugEnabled() ) - GeoConverterImpl.log.debug( experimentalFactor.getName() - + " already exists. Not adding to list of experimental factors." ); + GeoConverterImpl.log.debug( experimentalFactor.getName() + " already exists. Not adding to list of experimental factors." ); break; } } @@ -264,8 +259,7 @@ public void convertSubsetToExperimentalFactor( ExpressionExperiment expExp, GeoS * @return Primary taxon of array as determined by this method */ @Override - public Taxon getPrimaryArrayTaxon( Collection platformTaxa, Collection probeTaxa ) - throws IllegalArgumentException { + public Taxon getPrimaryArrayTaxon( Collection platformTaxa, Collection probeTaxa ) throws IllegalArgumentException { if ( platformTaxa == null || platformTaxa.isEmpty() ) { return null; @@ -304,8 +298,7 @@ public Taxon getPrimaryArrayTaxon( Collection platformTaxa, Collection highestScore ) { + if ( !taxon.equals( "n/a" ) && StringUtils.isNotBlank( taxon ) && taxonProbeNumberList.get( taxon ) > highestScore ) { primaryTaxonName = taxon; highestScore = taxonProbeNumberList.get( taxon ); } @@ -337,8 +330,7 @@ public void setSplitByPlatform( boolean splitByPlatform ) { @Override public byte[] convertData( List vector, QuantitationType qt ) { - if ( vector == null || vector.size() == 0 ) - return null; + if ( vector == null || vector.size() == 0 ) return null; boolean containsAtLeastOneNonNull = false; for ( Object string : vector ) { @@ -376,9 +368,7 @@ public byte[] convertData( List vector, QuantitationType qt ) { toConvert.add( rawValue ); } else if ( pt.equals( PrimitiveType.CHAR ) ) { if ( valueString.length() != 1 ) { - throw new IllegalStateException( - "Attempt to cast a string of length " + valueString.length() + " to a char: " - + rawValue + "(quantitation type =" + qt ); + throw new IllegalStateException( "Attempt to cast a string of length " + valueString.length() + " to a char: " + rawValue + "(quantitation type =" + qt ); } toConvert.add( valueString.toCharArray()[0] ); } else if ( pt.equals( PrimitiveType.INT ) ) { @@ -409,20 +399,17 @@ public byte[] convertData( List vector, QuantitationType qt ) { if ( pt.equals( PrimitiveType.DOUBLE ) ) { double[] byteArrayToDoubles = byteArrayConverter.byteArrayToDoubles( bytes ); if ( byteArrayToDoubles.length != vector.size() ) { - throw new IllegalStateException( - "Expected " + vector.size() + " got " + byteArrayToDoubles.length + " doubles" ); + throw new IllegalStateException( "Expected " + vector.size() + " got " + byteArrayToDoubles.length + " doubles" ); } } else if ( pt.equals( PrimitiveType.INT ) ) { int[] byteArrayToInts = byteArrayConverter.byteArrayToInts( bytes ); if ( byteArrayToInts.length != vector.size() ) { - throw new IllegalStateException( - "Expected " + vector.size() + " got " + byteArrayToInts.length + " ints" ); + throw new IllegalStateException( "Expected " + vector.size() + " got " + byteArrayToInts.length + " ints" ); } } else if ( pt.equals( PrimitiveType.BOOLEAN ) ) { boolean[] byteArrayToBooleans = byteArrayConverter.byteArrayToBooleans( bytes ); if ( byteArrayToBooleans.length != vector.size() ) { - throw new IllegalStateException( - "Expected " + vector.size() + " got " + byteArrayToBooleans.length + " booleans" ); + throw new IllegalStateException( "Expected " + vector.size() + " got " + byteArrayToBooleans.length + " booleans" ); } } @@ -487,8 +474,7 @@ protected String makeTitle( String title, String appendix ) { return StringUtils.abbreviate( title, maxWidth ); } - private void addFactorValueToBioMaterial( ExpressionExperiment expExp, GeoSubset geoSubSet, - FactorValue factorValue ) { + private void addFactorValueToBioMaterial( ExpressionExperiment expExp, GeoSubset geoSubSet, FactorValue factorValue ) { // fill in biomaterial-->factorvalue. for ( GeoSample sample : geoSubSet.getSamples() ) { @@ -497,9 +483,7 @@ private void addFactorValueToBioMaterial( ExpressionExperiment expExp, GeoSubset if ( bioAssay.getAccession().getAccession().equals( sample.getGeoAccession() ) ) { BioMaterial material = bioAssay.getSampleUsed(); if ( GeoConverterImpl.log.isDebugEnabled() ) { - GeoConverterImpl.log - .debug( "Adding " + factorValue.getExperimentalFactor() + " : " + factorValue + " to " - + material ); + GeoConverterImpl.log.debug( "Adding " + factorValue.getExperimentalFactor() + " : " + factorValue + " to " + material ); } material.getFactorValues().add( factorValue ); } @@ -533,30 +517,20 @@ private boolean alreadyHasFactorValueForFactor( BioMaterial bioMaterial, Experim * @param samplesToSkip samples to skip * @param series series */ - private void checkForDataToSkip( GeoSeries series, Collection dataSetsToSkip, - Collection samplesToSkip ) { + private void checkForDataToSkip( GeoSeries series, Collection dataSetsToSkip, Collection samplesToSkip ) { for ( GeoDataset dataset : series.getDataSets() ) { // This doesn't cover every possibility... - if ( dataset.getExperimentType().equals( ExperimentType.arrayCGH ) || dataset.getExperimentType() - .equals( ExperimentType.ChIPChip ) - || dataset.getExperimentType() - .equals( ExperimentType.geneExpressionSAGEbased ) - || dataset.getExperimentType() - .equals( ExperimentType.Other ) ) { - GeoConverterImpl.log - .warn( "Gemma does not know how to handle experiment type=" + dataset.getExperimentType() ); + if ( dataset.getExperimentType().equals( ExperimentType.arrayCGH ) || dataset.getExperimentType().equals( ExperimentType.ChIPChip ) || dataset.getExperimentType().equals( ExperimentType.geneExpressionSAGEbased ) || dataset.getExperimentType().equals( ExperimentType.Other ) ) { + GeoConverterImpl.log.warn( "Gemma does not know how to handle experiment type=" + dataset.getExperimentType() ); if ( series.getDataSets().size() == 1 ) { - GeoConverterImpl.log.warn( "Because the experiment type cannot be handled, " - + "and there is only one data set in this series, nothing will be returned!" ); + GeoConverterImpl.log.warn( "Because the experiment type cannot be handled, " + "and there is only one data set in this series, nothing will be returned!" ); } samplesToSkip.addAll( this.getDatasetSamples( dataset ) ); dataSetsToSkip.add( dataset.getGeoAccession() ); } else { - GeoConverterImpl.log - .info( "Data from " + dataset + " is of type " + dataset.getExperimentType() + ", " + this - .getDatasetSamples( dataset ).size() + " samples." ); + GeoConverterImpl.log.info( "Data from " + dataset + " is of type " + dataset.getExperimentType() + ", " + this.getDatasetSamples( dataset ).size() + " samples." ); } } @@ -566,11 +540,13 @@ private void checkForDataToSkip( GeoSeries series, Collection dataSetsTo continue; } else if ( sample.getType().equals( "SRA" ) || sample.getType().equals( "MPSS" ) ) { - if ( sample.getLibSource() != null && sample.getLibSource().equals( "transcriptomic" ) ) { + if ( sample.getLibSource() != null && sample.getLibSource().equals( "transcriptomic single cell" ) ) { + // FIXME e.g GSE213756 sample GSM6593523. Currently, we will reject these but will add support + // no-op, just making explicit. + } else if ( sample.getLibSource() != null && sample.getLibSource().equals( "transcriptomic" ) ) { // have to drill down. - if ( sample.getLibStrategy().equals( "RNA-Seq" ) || sample.getLibStrategy() - .equals( "ssRNA-seq" ) || sample.getLibStrategy().equalsIgnoreCase( "Other" ) ) { + if ( sample.getLibStrategy().equals( "RNA-Seq" ) || sample.getLibStrategy().equals( "ssRNA-seq" ) || sample.getLibStrategy().equalsIgnoreCase( "Other" ) ) { // I've added "other" to be allowed just to avoid being too strict, but removed miRNA and ncRNA. continue; } @@ -579,9 +555,7 @@ private void checkForDataToSkip( GeoSeries series, Collection dataSetsTo // some MPSS might not have libSource filled in. Other possibilities we know about for type are 'other', 'SAGE' and 'mixed'; - GeoConverterImpl.log - .info( "Skipping ineligible sample: " + sample.getGeoAccession() + ": Type=" + sample.getType() - + " LibSource=" + sample.getLibSource() + " LibStrategy=" + sample.getLibStrategy() ); + GeoConverterImpl.log.info( "Skipping ineligible sample: " + sample.getGeoAccession() + ": Type=" + sample.getType() + " LibSource=" + sample.getLibSource() + " LibStrategy=" + sample.getLibStrategy() ); samplesToSkip.add( sample ); } @@ -597,8 +571,7 @@ private void checkForDataToSkip( GeoSeries series, Collection dataSetsTo * @param platform platform * @param platformDatasetMap dataset map */ - private void convertByPlatform( GeoSeries series, Collection converted, - Map> platformDatasetMap, int i, GeoPlatform platform ) { + private void convertByPlatform( GeoSeries series, Collection converted, Map> platformDatasetMap, int i, GeoPlatform platform ) { GeoSeries platformSpecific = new GeoSeries(); Collection datasets = platformDatasetMap.get( platform ); @@ -637,8 +610,7 @@ private void convertByPlatform( GeoSeries series, Collection dataVector, QuantitationType qt ) { + private RawExpressionDataVector convertDesignElementDataVector( GeoPlatform geoPlatform, ExpressionExperiment expExp, BioAssayDimension bioAssayDimension, String designElementName, List dataVector, QuantitationType qt ) { - if ( dataVector == null || dataVector.size() == 0 ) - return null; + if ( dataVector == null || dataVector.size() == 0 ) return null; int numValuesExpected = bioAssayDimension.getBioAssays().size(); if ( dataVector.size() != numValuesExpected ) { - throw new IllegalArgumentException( - "Expected " + numValuesExpected + " in bioassaydimension, data contains " + dataVector.size() ); + throw new IllegalArgumentException( "Expected " + numValuesExpected + " in bioassaydimension, data contains " + dataVector.size() ); } byte[] blob = this.convertData( dataVector, qt ); if ( blob == null ) { // all missing etc. @@ -1104,10 +1058,7 @@ private RawExpressionDataVector convertDesignElementDataVector( GeoPlatform geoP return null; } - if ( compositeSequence.getBiologicalCharacteristic() != null - && compositeSequence.getBiologicalCharacteristic().getSequenceDatabaseEntry() != null && - compositeSequence.getBiologicalCharacteristic().getSequenceDatabaseEntry().getExternalDatabase() - .getName() == null ) { + if ( compositeSequence.getBiologicalCharacteristic() != null && compositeSequence.getBiologicalCharacteristic().getSequenceDatabaseEntry() != null && compositeSequence.getBiologicalCharacteristic().getSequenceDatabaseEntry().getExternalDatabase().getName() == null ) { // this is obscure. throw new IllegalStateException( compositeSequence + " sequence accession external database lacks name" ); } @@ -1196,9 +1147,7 @@ private ArrayDesign convertPlatform( GeoPlatform platform ) { // convert the design element information. String identifier = platform.getIdColumnName(); if ( identifier == null && !platform.getColumnNames().isEmpty() ) { - throw new IllegalStateException( - "Cannot determine the platform design element id column for " + platform + "; " + platform - .getColumnNames().size() + " column names available." ); + throw new IllegalStateException( "Cannot determine the platform design element id column for " + platform + "; " + platform.getColumnNames().size() + " column names available." ); } Collection externalReferences = this.determinePlatformExternalReferenceIdentifier( platform ); @@ -1254,9 +1203,7 @@ private ArrayDesign convertPlatform( GeoPlatform platform ) { return arrayDesign; } - boolean fullyUsable = this - .convertPlatformElements( identifier, platform, arrayDesign, externalReferences, probeOrganismColumn, - externalDb, descriptions, sequences, probeOrganism, primaryTaxon ); + boolean fullyUsable = this.convertPlatformElements( identifier, platform, arrayDesign, externalReferences, probeOrganismColumn, externalDb, descriptions, sequences, probeOrganism, primaryTaxon ); if ( !fullyUsable ) { GeoConverterImpl.log.warn( "Some or all identifiers may have been skipped during parse" ); @@ -1282,10 +1229,7 @@ private ArrayDesign convertPlatform( GeoPlatform platform ) { * the data doesn't * match) */ - private boolean convertPlatformElements( String identifier, GeoPlatform platform, ArrayDesign arrayDesign, - Collection externalReferences, String probeOrganismColumn, ExternalDatabase externalDb, - List descriptions, List sequences, List probeOrganism, Taxon - primaryTaxon ) { + private boolean convertPlatformElements( String identifier, GeoPlatform platform, ArrayDesign arrayDesign, Collection externalReferences, String probeOrganismColumn, ExternalDatabase externalDb, List descriptions, List sequences, List probeOrganism, Taxon primaryTaxon ) { /* * This is a very commonly found column name in files, it seems standard in GEO. If we don't find it, it's okay. @@ -1300,8 +1244,7 @@ private boolean convertPlatformElements( String identifier, GeoPlatform platform } if ( !platform.useDataFromGeo() && !forceConvertElements ) { - GeoConverterImpl.log - .warn( "Will not convert elements for this platform - set forceConvertElements to override" ); + GeoConverterImpl.log.warn( "Will not convert elements for this platform - set forceConvertElements to override" ); return false; } @@ -1312,15 +1255,10 @@ private boolean convertPlatformElements( String identifier, GeoPlatform platform externalRefs = platform.getColumnData( externalReferences ); } - assert externalRefs == null - || externalRefs.iterator().next().size() == identifiers.size() : "Unequal numbers of identifiers and external references! " - + externalRefs.iterator().next().size() - + " != " + identifiers.size(); + assert externalRefs == null || externalRefs.iterator().next().size() == identifiers.size() : "Unequal numbers of identifiers and external references! " + externalRefs.iterator().next().size() + " != " + identifiers.size(); if ( GeoConverterImpl.log.isDebugEnabled() ) { - GeoConverterImpl.log - .debug( "Converting " + identifiers.size() + " probe identifiers on GEO platform " + platform - .getGeoAccession() ); + GeoConverterImpl.log.debug( "Converting " + identifiers.size() + " probe identifiers on GEO platform " + platform.getGeoAccession() ); } Iterator descIter = null; @@ -1337,9 +1275,7 @@ private boolean convertPlatformElements( String identifier, GeoPlatform platform if ( identifiers.size() > tooManyElements ) { // something odd like an exon array in GEO, there are lots of unused probes (data sets don't use them) - GeoConverterImpl.log - .warn( "Platform " + platform.getGeoAccession() + " has more elements than expected (" + identifiers - .size() + "), turning on strict selection method" ); + GeoConverterImpl.log.warn( "Platform " + platform.getGeoAccession() + " has more elements than expected (" + identifiers.size() + "), turning on strict selection method" ); strictSelection = true; } @@ -1347,36 +1283,25 @@ private boolean convertPlatformElements( String identifier, GeoPlatform platform Collection compositeSequences = new ArrayList<>( 5000 ); int i = 0; // to get sequences, if we have them, and clone identifiers. for ( String id : identifiers ) { - i = this.processId( platform, arrayDesign, probeOrganismColumn, externalDb, sequences, probeOrganism, - primaryTaxon, cloneIdentifiers, externalRefs, descIter, refSeqAccessionPattern, strictSelection, - skipped, compositeSequences, i, id ); + i = this.processId( platform, arrayDesign, probeOrganismColumn, externalDb, sequences, probeOrganism, primaryTaxon, cloneIdentifiers, externalRefs, descIter, refSeqAccessionPattern, strictSelection, skipped, compositeSequences, i, id ); } arrayDesign.setCompositeSequences( new HashSet<>( compositeSequences ) ); arrayDesign.setAdvertisedNumberOfDesignElements( compositeSequences.size() ); if ( !skipped.isEmpty() ) { - GeoConverterImpl.log - .info( "Skipped " + skipped.size() + " elements due to strict selection; last was " + skipped - .get( skipped.size() - 1 ) + "; retained: " + compositeSequences.size() ); + GeoConverterImpl.log.info( "Skipped " + skipped.size() + " elements due to strict selection; last was " + skipped.get( skipped.size() - 1 ) + "; retained: " + compositeSequences.size() ); } if ( arrayDesign.getCompositeSequences().size() > tooManyElements ) { // this is just a safeguard; perhaps temporary. - throw new IllegalStateException( - "Platform " + arrayDesign.getShortName() + " has too many elements to be loaded. " + arrayDesign - .getCompositeSequences().size() ); + throw new IllegalStateException( "Platform " + arrayDesign.getShortName() + " has too many elements to be loaded. " + arrayDesign.getCompositeSequences().size() ); } GeoConverterImpl.log.info( arrayDesign.getCompositeSequences().size() + " elements on the platform" ); return !strictSelection; } - private int processId( GeoPlatform platform, ArrayDesign arrayDesign, String probeOrganismColumn, - ExternalDatabase externalDb, List sequences, List probeOrganism, Taxon - primaryTaxon, - List cloneIdentifiers, List> externalRefs, Iterator descIter, - Pattern refSeqAccessionPattern, boolean strictSelection, List skipped, - Collection compositeSequences, int i, String id ) { + private int processId( GeoPlatform platform, ArrayDesign arrayDesign, String probeOrganismColumn, ExternalDatabase externalDb, List sequences, List probeOrganism, Taxon primaryTaxon, List cloneIdentifiers, List> externalRefs, Iterator descIter, Pattern refSeqAccessionPattern, boolean strictSelection, List skipped, Collection compositeSequences, int i, String id ) { String externalAccession = null; if ( externalRefs != null ) { externalAccession = this.getExternalAccession( externalRefs, i ); @@ -1397,9 +1322,7 @@ private int processId( GeoPlatform platform, ArrayDesign arrayDesign, String pro skipped.add( id ); if ( skipped.size() % 50000 == 0 ) { - GeoConverterImpl.log - .info( "Skipped " + skipped.size() + " elements due to strict selection; last was " - + id ); + GeoConverterImpl.log.info( "Skipped " + skipped.size() + " elements due to strict selection; last was " + id ); } i++; return i; @@ -1423,8 +1346,7 @@ private int processId( GeoPlatform platform, ArrayDesign arrayDesign, String pro } } - if ( descIter != null ) - description = description + " " + descIter.next(); + if ( descIter != null ) description = description + " " + descIter.next(); CompositeSequence cs = CompositeSequence.Factory.newInstance(); String probeName = platform.getProbeNamesInGemma().get( id ); @@ -1434,8 +1356,7 @@ private int processId( GeoPlatform platform, ArrayDesign arrayDesign, String pro GeoConverterImpl.log.debug( "Probe retaining original name: " + probeName ); platform.getProbeNamesInGemma().put( id, id ); // must make sure this is populated. } else { - if ( GeoConverterImpl.log.isDebugEnabled() ) - GeoConverterImpl.log.debug( "Found probe: " + probeName ); + if ( GeoConverterImpl.log.isDebugEnabled() ) GeoConverterImpl.log.debug( "Found probe: " + probeName ); } cs.setName( probeName ); @@ -1455,8 +1376,7 @@ private int processId( GeoPlatform platform, ArrayDesign arrayDesign, String pro } BioSequence bs = this.createMinimalBioSequence( probeTaxon ); - this.setBsProps( platform, externalDb, sequences, refSeqAccessionPattern, i, id, externalAccession, - cloneIdentifier, bs ); + this.setBsProps( platform, externalDb, sequences, refSeqAccessionPattern, i, id, externalAccession, cloneIdentifier, bs ); this.checkCs( arrayDesign, externalAccession, cloneIdentifier, cs, probeTaxon, bs ); @@ -1467,20 +1387,17 @@ private int processId( GeoPlatform platform, ArrayDesign arrayDesign, String pro return i; } - private void checkCs( ArrayDesign arrayDesign, String externalAccession, String cloneIdentifier, - CompositeSequence cs, Taxon probeTaxon, BioSequence bs ) { + private void checkCs( ArrayDesign arrayDesign, String externalAccession, String cloneIdentifier, CompositeSequence cs, Taxon probeTaxon, BioSequence bs ) { /* * If we have no basis for describing the sequence, we have to skip it. */ if ( StringUtils.isBlank( externalAccession ) && StringUtils.isBlank( cloneIdentifier ) ) { if ( GeoConverterImpl.log.isDebugEnabled() ) { - GeoConverterImpl.log.debug( "Blank external reference and clone id for " + cs + " on " + arrayDesign - + ", no biological rawGEOString can be added." ); + GeoConverterImpl.log.debug( "Blank external reference and clone id for " + cs + " on " + arrayDesign + ", no biological rawGEOString can be added." ); } } else if ( probeTaxon == null ) { if ( GeoConverterImpl.log.isDebugEnabled() ) { - GeoConverterImpl.log.debug( "No valid taxon identified for " + cs + " on " + arrayDesign - + ", no biological rawGEOString can be added." ); + GeoConverterImpl.log.debug( "No valid taxon identified for " + cs + " on " + arrayDesign + ", no biological rawGEOString can be added." ); } } else if ( probeTaxon.getId() != null ) { // IF there is no taxon given for probe do not create a biosequence otherwise bombs as there is no taxon @@ -1489,9 +1406,7 @@ private void checkCs( ArrayDesign arrayDesign, String externalAccession, String } } - private void setBsProps( GeoPlatform platform, ExternalDatabase externalDb, List sequences, - Pattern refSeqAccessionPattern, int i, String id, String externalAccession, String cloneIdentifier, - BioSequence bs ) { + private void setBsProps( GeoPlatform platform, ExternalDatabase externalDb, List sequences, Pattern refSeqAccessionPattern, int i, String id, String externalAccession, String cloneIdentifier, BioSequence bs ) { boolean isRefseq = false; // ExternalDB will be null if it's IMAGE (this is really pretty messy, sorry) @@ -1515,9 +1430,7 @@ private void setBsProps( GeoPlatform platform, ExternalDatabase externalDb, List bs.setLength( ( long ) bs.getSequence().length() ); bs.setType( SequenceType.DNA ); bs.setName( id ); - bs.setDescription( - "Sequence from platform " + platform.getGeoAccession() + " provided by manufacturer. " - + ( externalAccession != null ? "Used in leiu of " + externalAccession : "No external accession provided" ) ); + bs.setDescription( "Sequence from platform " + platform.getGeoAccession() + " provided by manufacturer. " + ( externalAccession != null ? "Used in leiu of " + externalAccession : "No external accession provided" ) ); } else if ( externalAccession != null && !isRefseq && externalDb != null ) { /* @@ -1549,8 +1462,7 @@ private Collection convertPlatformOrganisms( GeoPlatform platform, String } for ( String taxonScientificName : organisms ) { - if ( taxonScientificName == null ) - continue; + if ( taxonScientificName == null ) continue; taxaOnPlatform.append( ": " ).append( taxonScientificName ); // make sure add scientific name to map for platform if ( taxonScientificNameMap.containsKey( taxonScientificName ) ) { @@ -1624,8 +1536,7 @@ private Taxon convertProbeOrganism( String probeOrganism ) { private void convertPubMedIds( GeoSeries series, ExpressionExperiment expExp ) { Collection ids = series.getPubmedIds(); - if ( ids == null || ids.size() == 0 ) - return; + if ( ids == null || ids.size() == 0 ) return; //noinspection LoopStatementThatDoesntLoop // Usually just one for ( String string : ids ) { @@ -1710,8 +1621,7 @@ private void convertReplicationToFactorValue( GeoReplication replication, Experi * @param experimentalDesign experimental design * @return BA */ - private BioAssay convertSample( GeoSample sample, BioMaterial bioMaterial, ExperimentalDesign - experimentalDesign ) { + private BioAssay convertSample( GeoSample sample, BioMaterial bioMaterial, ExperimentalDesign experimentalDesign ) { if ( sample == null ) { GeoConverterImpl.log.warn( "Null sample" ); return null; @@ -1816,18 +1726,14 @@ private Collection convertSeries( GeoSeries series ) { // get map of platform to dataset. if ( organismDatasetMap.size() > 1 ) { - GeoConverterImpl.log - .warn( "**** Multiple-species series, with multiple datasets. This series will be split into " - + organismDatasetMap.size() + " experiments. ****" ); + GeoConverterImpl.log.warn( "**** Multiple-species series, with multiple datasets. This series will be split into " + organismDatasetMap.size() + " experiments. ****" ); int i = 1; for ( String organism : organismDatasetMap.keySet() ) { this.convertSpeciesSpecific( series, converted, organismDatasetMap, i, organism ); i++; } } else if ( organismSampleMap.size() > 1 ) { - GeoConverterImpl.log - .warn( "**** Multiple-species series. This series will be split into " + organismSampleMap.size() - + " experiments. ****" ); + GeoConverterImpl.log.warn( "**** Multiple-species series. This series will be split into " + organismSampleMap.size() + " experiments. ****" ); int i = 1; for ( String organism : organismSampleMap.keySet() ) { this.convertSpeciesSpecificSamples( series, converted, organismSampleMap, i, organism ); @@ -1841,8 +1747,7 @@ private Collection convertSeries( GeoSeries series ) { } } else { ExpressionExperiment ee = this.convertSeriesSingle( series ); - if ( ee != null ) - converted.add( ee ); + if ( ee != null ) converted.add( ee ); } return converted; @@ -1875,8 +1780,7 @@ private void convertSeriesDataVectors( GeoSeries geoSeries, ExpressionExperiment * @return ExpressionExperiment, or null if the series cannot be converted (wrong sample type, etc.) */ private ExpressionExperiment convertSeriesSingle( GeoSeries series ) { - if ( series == null ) - return null; + if ( series == null ) return null; GeoConverterImpl.log.info( "Converting series: " + series.getGeoAccession() ); Collection dataSets = series.getDataSets(); @@ -1889,8 +1793,7 @@ private ExpressionExperiment convertSeriesSingle( GeoSeries series ) { } if ( !this.isUsable( series ) ) { - GeoConverterImpl.log - .info( "Series was not usable: types=" + StringUtils.join( series.getSeriesTypes(), " " ) ); + GeoConverterImpl.log.info( "Series was not usable: types=" + StringUtils.join( series.getSeriesTypes(), " " ) ); return null; } @@ -1912,8 +1815,7 @@ private ExpressionExperiment convertSeriesSingle( GeoSeries series ) { expExp.setDescription( series.getSummaries() + ( series.getSummaries().endsWith( "\n" ) ? "" : "\n" ) ); if ( series.getLastUpdateDate() != null ) { - expExp.setDescription( - expExp.getDescription() + "At time of import, last updated (by provider) on: " + series.getLastUpdateDate() + "\n" ); + expExp.setDescription( expExp.getDescription() + "At time of import, last updated (by provider) on: " + series.getLastUpdateDate() + "\n" ); } // note that if this was part of a split, makeTitle will already have been called, but that's okay @@ -1972,8 +1874,7 @@ private ExpressionExperiment convertSeriesSingle( GeoSeries series ) { } // spits out a big summary of the correspondence. - if ( GeoConverterImpl.log.isDebugEnabled() ) - GeoConverterImpl.log.debug( series.getSampleCorrespondence() ); + if ( GeoConverterImpl.log.isDebugEnabled() ) GeoConverterImpl.log.debug( series.getSampleCorrespondence() ); int numBioMaterials = 0; int numSkippedBioMaterials = 0; /* @@ -1984,13 +1885,11 @@ private ExpressionExperiment convertSeriesSingle( GeoSeries series ) { for ( Iterator> iter = series.getSampleCorrespondence().iterator(); iter.hasNext(); ) { Set correspondingSamples = iter.next(); - if ( correspondingSamples.isEmpty() ) - continue; // can happen after removing samples (multitaxon) + if ( correspondingSamples.isEmpty() ) continue; // can happen after removing samples (multitaxon) BioMaterial bioMaterial = BioMaterial.Factory.newInstance(); String bioMaterialName = this.getBiomaterialPrefix( series, ++numBioMaterials ); - StringBuilder bioMaterialDescription = new StringBuilder( - GeoConverterImpl.BIOMATERIAL_DESCRIPTION_PREFIX + series.getGeoAccession() ); + StringBuilder bioMaterialDescription = new StringBuilder( GeoConverterImpl.BIOMATERIAL_DESCRIPTION_PREFIX + series.getGeoAccession() ); // From the series samples, find the sample that corresponds and convert it. for ( String cSample : correspondingSamples ) { @@ -2010,16 +1909,14 @@ private ExpressionExperiment convertSeriesSingle( GeoSeries series ) { if ( accession.equals( cSample ) ) { if ( seen.contains( accession ) ) { - GeoConverterImpl.log - .error( "Got " + accession + " twice, this time in set " + correspondingSamples ); + GeoConverterImpl.log.error( "Got " + accession + " twice, this time in set " + correspondingSamples ); } seen.add( accession ); BioAssay ba = this.convertSample( sample, bioMaterial, expExp.getExperimentalDesign() ); assert ( ba != null ); - ba.setDescription( ba.getDescription() + "\nSource GEO sample is " + sample.getGeoAccession() - + "\nLast updated (according to GEO): " + sample.getLastUpdateDate() ); + ba.setDescription( ba.getDescription() + "\nSource GEO sample is " + sample.getGeoAccession() + "\nLast updated (according to GEO): " + sample.getLastUpdateDate() ); assert ba.getSampleUsed() != null; bioMaterial.getBioAssaysUsedIn().add( ba ); @@ -2032,8 +1929,7 @@ private ExpressionExperiment convertSeriesSingle( GeoSeries series ) { } if ( !found ) { if ( GeoConverterImpl.log.isDebugEnabled() ) - GeoConverterImpl.log.debug( "No sample found in " + series + " to match " + cSample - + "; this can happen if some samples were not run on all platforms." ); + GeoConverterImpl.log.debug( "No sample found in " + series + " to match " + cSample + "; this can happen if some samples were not run on all platforms." ); } } @@ -2046,17 +1942,14 @@ private ExpressionExperiment convertSeriesSingle( GeoSeries series ) { } } - GeoConverterImpl.log.info( "Expression Experiment from " + series + " has " + expExp.getBioAssays().size() - + " bioassays and " + ( numBioMaterials - numSkippedBioMaterials ) + " biomaterials." ); + GeoConverterImpl.log.info( "Expression Experiment from " + series + " has " + expExp.getBioAssays().size() + " bioassays and " + ( numBioMaterials - numSkippedBioMaterials ) + " biomaterials." ); int expectedNumSamples = series.getSamples().size() - samplesToSkip.size(); int actualNumSamples = expExp.getBioAssays().size(); expExp.setNumberOfSamples( actualNumSamples ); if ( expectedNumSamples > actualNumSamples ) { - GeoConverterImpl.log.warn( ( expectedNumSamples - actualNumSamples ) - + " samples were not in the 'sample correspondence'" - + " and have been omitted. Possibly they were in the Series (GSE) but not in the corresponding Dataset (GDS)?" ); + GeoConverterImpl.log.warn( ( expectedNumSamples - actualNumSamples ) + " samples were not in the 'sample correspondence'" + " and have been omitted. Possibly they were in the Series (GSE) but not in the corresponding Dataset (GDS)?" ); } // this is mostly only needed for converting data vectors, which will be confused by the extra ones @@ -2071,8 +1964,7 @@ private ExpressionExperiment convertSeriesSingle( GeoSeries series ) { } } else { for ( GeoDataset dataset : dataSets ) { - if ( dataSetsToSkip.contains( dataset.getGeoAccession() ) ) - continue; + if ( dataSetsToSkip.contains( dataset.getGeoAccession() ) ) continue; this.convertDataset( dataset, expExp ); } } @@ -2080,8 +1972,7 @@ private ExpressionExperiment convertSeriesSingle( GeoSeries series ) { return expExp; } - private void convertSpeciesSpecific( GeoSeries series, Collection converted, - Map> organismDatasetMap, int i, String organism ) { + private void convertSpeciesSpecific( GeoSeries series, Collection converted, Map> organismDatasetMap, int i, String organism ) { GeoSeries speciesSpecific = new GeoSeries(); Collection datasets = organismDatasetMap.get( organism ); @@ -2118,16 +2009,13 @@ private void convertSpeciesSpecific( GeoSeries series, Collection converted, - Map> organismSampleMap, int i, String organism ) { + private void convertSpeciesSpecificSamples( GeoSeries series, Collection converted, Map> organismSampleMap, int i, String organism ) { GeoSeries speciesSpecific = new GeoSeries(); @@ -2169,8 +2057,7 @@ private void convertSpeciesSpecificSamples( GeoSeries series, Collection datasetSamples, GeoPlatform geoPlatform ) { + private void convertVectorsForPlatform( GeoValues values, ExpressionExperiment expExp, List datasetSamples, GeoPlatform geoPlatform ) { assert datasetSamples.size() > 0 : "No samples in dataset"; if ( !geoPlatform.useDataFromGeo() ) { // see bug 4181 - GeoConverterImpl.log - .warn( "Platform characteristics indicate data from GEO should be ignored or will not be present anyway (" - + geoPlatform + ")" ); + GeoConverterImpl.log.warn( "Platform characteristics indicate data from GEO should be ignored or will not be present anyway (" + geoPlatform + ")" ); return; } - GeoConverterImpl.log - .info( "Converting vectors for " + geoPlatform.getGeoAccession() + ", " + datasetSamples.size() - + " samples." ); + GeoConverterImpl.log.info( "Converting vectors for " + geoPlatform.getGeoAccession() + ", " + datasetSamples.size() + " samples." ); BioAssayDimension bioAssayDimension = this.convertGeoSampleList( datasetSamples, expExp ); @@ -2424,15 +2302,12 @@ private void convertVectorsForPlatform( GeoValues values, ExpressionExperiment e int columnAccordingToSample = quantitationTypes.indexOf( quantitationType ); int quantitationTypeIndex = values.getQuantitationTypeIndex( geoPlatform, quantitationType ); - GeoConverterImpl.log.debug( "Processing " + quantitationType + " (column=" + quantitationTypeIndex - + " - according to sample, it's " + columnAccordingToSample + ")" ); + GeoConverterImpl.log.debug( "Processing " + quantitationType + " (column=" + quantitationTypeIndex + " - according to sample, it's " + columnAccordingToSample + ")" ); - Map> dataVectors = this - .makeDataVectors( values, datasetSamples, quantitationTypeIndex ); + Map> dataVectors = this.makeDataVectors( values, datasetSamples, quantitationTypeIndex ); if ( dataVectors == null || dataVectors.size() == 0 ) { - GeoConverterImpl.log - .debug( "No data for " + quantitationType + " (column=" + quantitationTypeIndex + ")" ); + GeoConverterImpl.log.debug( "No data for " + quantitationType + " (column=" + quantitationTypeIndex + ")" ); continue; } GeoConverterImpl.log.info( dataVectors.size() + " data vectors for " + quantitationType ); @@ -2443,38 +2318,30 @@ private void convertVectorsForPlatform( GeoValues values, ExpressionExperiment e qt.setName( quantitationType ); String description = quantitationTypeDescriptions.get( columnAccordingToSample ); qt.setDescription( description ); - QuantitationTypeParameterGuesser - .guessQuantitationTypeParameters( qt, quantitationType, description, exampleValue ); + QuantitationTypeParameterGuesser.guessQuantitationTypeParameters( qt, quantitationType, description, exampleValue ); int count = 0; int skipped = 0; for ( String designElementName : dataVectors.keySet() ) { List dataVector = dataVectors.get( designElementName ); - if ( dataVector == null || dataVector.size() == 0 ) - continue; + if ( dataVector == null || dataVector.size() == 0 ) continue; - RawExpressionDataVector vector = this - .convertDesignElementDataVector( geoPlatform, expExp, bioAssayDimension, designElementName, - dataVector, qt ); + RawExpressionDataVector vector = this.convertDesignElementDataVector( geoPlatform, expExp, bioAssayDimension, designElementName, dataVector, qt ); if ( vector == null ) { skipped++; if ( GeoConverterImpl.log.isDebugEnabled() ) - GeoConverterImpl.log - .debug( "Null vector for DE=" + designElementName + " QT=" + quantitationType ); + GeoConverterImpl.log.debug( "Null vector for DE=" + designElementName + " QT=" + quantitationType ); continue; } if ( GeoConverterImpl.log.isTraceEnabled() ) { - GeoConverterImpl.log - .trace( designElementName + " " + qt.getName() + " " + qt.getRepresentation() + " " - + dataVector.size() + " elements in vector" ); + GeoConverterImpl.log.trace( designElementName + " " + qt.getName() + " " + qt.getRepresentation() + " " + dataVector.size() + " elements in vector" ); } expExp.getRawExpressionDataVectors().add( vector ); - if ( ++count % GeoConverterImpl.LOGGING_VECTOR_COUNT_UPDATE == 0 && GeoConverterImpl.log - .isDebugEnabled() ) { + if ( ++count % GeoConverterImpl.LOGGING_VECTOR_COUNT_UPDATE == 0 && GeoConverterImpl.log.isDebugEnabled() ) { GeoConverterImpl.log.debug( count + " Data vectors added" ); } } @@ -2485,17 +2352,14 @@ private void convertVectorsForPlatform( GeoValues values, ExpressionExperiment e GeoConverterImpl.log.debug( count + " Data vectors added for '" + quantitationType + "'" ); } } else { - GeoConverterImpl.log.info( "No vectors were retained for " + quantitationType - + " -- usually this is due to all values being missing." ); + GeoConverterImpl.log.info( "No vectors were retained for " + quantitationType + " -- usually this is due to all values being missing." ); } if ( skipped > 0 ) { GeoConverterImpl.log.info( "Skipped " + skipped + " vectors" ); } } - GeoConverterImpl.log - .info( "Total of " + expExp.getRawExpressionDataVectors().size() + " vectors on platform " + geoPlatform - + ", " + expExp.getQuantitationTypes().size() + " quantitation types." ); + GeoConverterImpl.log.info( "Total of " + expExp.getRawExpressionDataVectors().size() + " vectors on platform " + geoPlatform + ", " + expExp.getQuantitationTypes().size() + " quantitation types." ); } private DatabaseEntry createDatabaseEntry( ExternalDatabase externalDb, String externalRef, BioSequence bs ) { @@ -2520,23 +2384,18 @@ private ArrayDesign createMinimalArrayDesign( GeoPlatform platform ) { arrayDesign.setShortName( platform.getGeoAccession() ); arrayDesign.setDescription( platform.getDescriptions() ); PlatformType technology = platform.getTechnology(); - if ( technology == PlatformType.dualChannel || technology == PlatformType.dualChannelGenomic - || technology == PlatformType.spottedOligonucleotide || technology == PlatformType.spottedDNAOrcDNA ) { + if ( technology == PlatformType.dualChannel || technology == PlatformType.dualChannelGenomic || technology == PlatformType.spottedOligonucleotide || technology == PlatformType.spottedDNAOrcDNA ) { arrayDesign.setTechnologyType( TechnologyType.TWOCOLOR ); - } else if ( technology == PlatformType.singleChannel || technology == PlatformType.oligonucleotideBeads - || technology == PlatformType.inSituOligonucleotide ) { + } else if ( technology == PlatformType.singleChannel || technology == PlatformType.oligonucleotideBeads || technology == PlatformType.inSituOligonucleotide ) { arrayDesign.setTechnologyType( TechnologyType.ONECOLOR ); } else if ( technology == null ) { - GeoConverterImpl.log - .warn( "No technology type available for " + platform + ", provisionally setting to 'other'" ); + GeoConverterImpl.log.warn( "No technology type available for " + platform + ", provisionally setting to 'other'" ); arrayDesign.setTechnologyType( TechnologyType.OTHER ); } else if ( technology.equals( PlatformType.MPSS ) ) { arrayDesign.setTechnologyType( TechnologyType.SEQUENCING ); - } else if ( technology.equals( PlatformType.SAGE ) || technology.equals( PlatformType.SAGENlaIII ) || technology - .equals( PlatformType.SAGERsaI ) || technology.equals( PlatformType.SAGESau3A ) ) { + } else if ( technology.equals( PlatformType.SAGE ) || technology.equals( PlatformType.SAGENlaIII ) || technology.equals( PlatformType.SAGERsaI ) || technology.equals( PlatformType.SAGESau3A ) ) { arrayDesign.setTechnologyType( TechnologyType.SEQUENCING ); - } else if ( technology - .equals( PlatformType.other ) ) { + } else if ( technology.equals( PlatformType.other ) ) { // We don't know.... arrayDesign.setTechnologyType( TechnologyType.OTHER ); } else { @@ -2561,8 +2420,7 @@ private String determinePlatformDescriptionColumn( GeoPlatform platform ) { int index = 0; for ( String string : columnNames ) { if ( GeoConstants.likelyProbeDescription( string ) ) { - GeoConverterImpl.log.debug( string + " appears to indicate the probe descriptions in column " + index - + " for platform " + platform ); + GeoConverterImpl.log.debug( string + " appears to indicate the probe descriptions in column " + index + " for platform " + platform ); return string; } index++; @@ -2574,8 +2432,7 @@ private String determinePlatformDescriptionColumn( GeoPlatform platform ) { private ExternalDatabase determinePlatformExternalDatabase( GeoPlatform platform ) { ExternalDatabase result = ExternalDatabase.Factory.newInstance(); - Collection likelyExternalDatabaseIdentifiers = this - .determinePlatformExternalReferenceIdentifier( platform ); + Collection likelyExternalDatabaseIdentifiers = this.determinePlatformExternalReferenceIdentifier( platform ); String dbIdentifierDescription = this.getDbIdentifierDescription( platform ); String url; @@ -2592,8 +2449,7 @@ private ExternalDatabase determinePlatformExternalDatabase( GeoPlatform platform } String likelyExternalDatabaseIdentifier = likelyExternalDatabaseIdentifiers.iterator().next(); - if ( likelyExternalDatabaseIdentifier.equals( "GB_ACC" ) || likelyExternalDatabaseIdentifier.equals( "GB_LIST" ) - || likelyExternalDatabaseIdentifier.toLowerCase().equals( "genbank" ) ) { + if ( likelyExternalDatabaseIdentifier.equals( "GB_ACC" ) || likelyExternalDatabaseIdentifier.equals( "GB_LIST" ) || likelyExternalDatabaseIdentifier.toLowerCase().equals( "genbank" ) ) { if ( genbank == null ) { if ( externalDatabaseService != null ) { genbank = externalDatabaseService.findByName( "Genbank" ); @@ -2629,9 +2485,7 @@ private Collection determinePlatformExternalReferenceIdentifier( GeoPlat Collection matches = new HashSet<>(); for ( String string : columnNames ) { if ( GeoConstants.likelyExternalReference( string ) ) { - GeoConverterImpl.log - .debug( string + " appears to indicate a possible external reference identifier in column " - + index + " for platform " + platform ); + GeoConverterImpl.log.debug( string + " appears to indicate a possible external reference identifier in column " + index + " for platform " + platform ); matches.add( string ); } @@ -2657,8 +2511,7 @@ private String determinePlatformProbeOrganismColumn( GeoPlatform platform ) { int index = 0; for ( String columnName : columnNames ) { if ( GeoConstants.likelyProbeOrganism( columnName ) ) { - GeoConverterImpl.log.debug( "'" + columnName + "' appears to indicate the sequences in column " + index - + " for platform " + platform ); + GeoConverterImpl.log.debug( "'" + columnName + "' appears to indicate the sequences in column " + index + " for platform " + platform ); return columnName; } index++; @@ -2672,8 +2525,7 @@ private String determinePlatformSequenceColumn( GeoPlatform platform ) { int index = 0; for ( String columnName : columnNames ) { if ( GeoConstants.likelySequence( columnName ) ) { - GeoConverterImpl.log.debug( "'" + columnName + "' appears to indicate the sequences in column " + index - + " for platform " + platform ); + GeoConverterImpl.log.debug( "'" + columnName + "' appears to indicate the sequences in column " + index + " for platform " + platform ); return columnName; } index++; @@ -2691,12 +2543,10 @@ private void doFallback( BioMaterial bioMaterial, String value, String defaultDe bioMaterial.getCharacteristics().add( gemmaChar ); } - private FactorValue findMatchingExperimentalFactorValue( Collection experimentalFactors, - FactorValue convertVariableToFactorValue ) { + private FactorValue findMatchingExperimentalFactorValue( Collection experimentalFactors, FactorValue convertVariableToFactorValue ) { Collection characteristics = convertVariableToFactorValue.getCharacteristics(); if ( characteristics.size() > 1 ) - throw new UnsupportedOperationException( - "Can't handle factor values with multiple characteristics in GEO conversion" ); + throw new UnsupportedOperationException( "Can't handle factor values with multiple characteristics in GEO conversion" ); Characteristic c = characteristics.iterator().next(); FactorValue matchingFactorValue = null; @@ -2735,17 +2585,13 @@ private Collection getDatasetSamples( GeoDataset geoDataset ) { for ( GeoSample sample : seriesSamples ) { if ( geoDataset.getColumnNames().contains( sample.getGeoAccession() ) ) { if ( GeoConverterImpl.log.isDebugEnabled() ) { - GeoConverterImpl.log - .debug( "Dataset " + geoDataset + " includes sample " + sample + " on platform " + sample - .getPlatforms().iterator().next() ); + GeoConverterImpl.log.debug( "Dataset " + geoDataset + " includes sample " + sample + " on platform " + sample.getPlatforms().iterator().next() ); } datasetSamples.add( sample ); } if ( GeoConverterImpl.log.isDebugEnabled() ) { - GeoConverterImpl.log - .debug( "Dataset " + geoDataset + " DOES NOT include sample " + sample + " on platform " - + sample.getPlatforms().iterator().next() ); + GeoConverterImpl.log.debug( "Dataset " + geoDataset + " DOES NOT include sample " + sample + " on platform " + sample.getPlatforms().iterator().next() ); } } @@ -2783,8 +2629,7 @@ private Map> getOrganismDatasetMap( GeoSeries series for ( GeoSample sample : series.getSamples() ) { assert sample.getPlatforms().size() > 0 : sample + " has no platform"; - assert sample.getPlatforms().size() == 1 : sample + " has multiple platforms: " - + StringUtils.join( sample.getPlatforms().toArray(), "," ); + assert sample.getPlatforms().size() == 1 : sample + " has multiple platforms: " + StringUtils.join( sample.getPlatforms().toArray(), "," ); String organism = sample.getPlatforms().iterator().next().getOrganisms().iterator().next(); if ( !organisms.containsKey( organism ) ) { @@ -2826,8 +2671,7 @@ private Map> getPlatformDatasetMap( GeoSeries s if ( series.getDataSets() == null || series.getDataSets().size() == 0 ) { for ( GeoSample sample : series.getSamples() ) { assert sample.getPlatforms().size() > 0 : sample + " has no platform"; - assert sample.getPlatforms().size() == 1 : sample + " has multiple platforms: " - + StringUtils.join( sample.getPlatforms().toArray(), "," ); + assert sample.getPlatforms().size() == 1 : sample + " has multiple platforms: " + StringUtils.join( sample.getPlatforms().toArray(), "," ); GeoPlatform platform = sample.getPlatforms().iterator().next(); if ( platforms.get( platform ) == null ) { @@ -2857,12 +2701,10 @@ private GeoPlatform getPlatformForSamples( List datasetSamples ) { Collection platforms = sample.getPlatforms(); assert platforms.size() != 0; if ( platforms.size() > 1 ) { - throw new UnsupportedOperationException( - "Can't handle GEO sample ids associated with multiple platforms just yet" ); + throw new UnsupportedOperationException( "Can't handle GEO sample ids associated with multiple platforms just yet" ); } GeoPlatform nextPlatform = platforms.iterator().next(); - if ( platform == null ) - platform = nextPlatform; + if ( platform == null ) platform = nextPlatform; else if ( !platform.equals( nextPlatform ) ) throw new IllegalArgumentException( "All samples here must use the same platform" ); } @@ -2888,8 +2730,7 @@ private Collection getSeriesSamplesForDataset( GeoDataset geoDataset for ( GeoSeries series2 : series ) { if ( series2.getSamples() != null && series2.getSamples().size() > 0 ) { if ( found ) { - throw new IllegalStateException( - "More than one of the series for " + geoDataset + " has samples: " + series2 ); + throw new IllegalStateException( "More than one of the series for " + geoDataset + " has samples: " + series2 ); } seriesSamples = series2.getSamples(); found = true; @@ -2916,8 +2757,7 @@ private void handleMissing( List toConvert, PrimitiveType pt ) { } else if ( pt.equals( PrimitiveType.BOOLEAN ) ) { toConvert.add( false ); } else { - throw new UnsupportedOperationException( - "Missing values in data vectors of type " + pt + " not supported" ); + throw new UnsupportedOperationException( "Missing values in data vectors of type " + pt + " not supported" ); } } @@ -2969,8 +2809,7 @@ private boolean isPopulated( Map> dataVectors ) { */ private boolean isUsable( GeoSeries series ) { - return series.getSeriesTypes().contains( SeriesType.geneExpressionByArray ) || series.getSeriesTypes() - .contains( SeriesType.geneExpressionBySequencing ); + return series.getSeriesTypes().contains( SeriesType.geneExpressionByArray ) || series.getSeriesTypes().contains( SeriesType.geneExpressionBySequencing ); } @@ -2983,8 +2822,7 @@ private boolean isUsable( GeoSeries series ) { * @return A map of Strings (design element names) to Lists of Strings containing the data. * @throws IllegalArgumentException if the columnNumber is not valid */ - private Map> makeDataVectors( GeoValues values, List datasetSamples, - Integer quantitationTypeIndex ) { + private Map> makeDataVectors( GeoValues values, List datasetSamples, Integer quantitationTypeIndex ) { Map> dataVectors = new HashMap<>( GeoConverterImpl.INITIAL_VECTOR_CAPACITY ); Collections.sort( datasetSamples ); GeoPlatform platform = this.getPlatformForSamples( datasetSamples ); @@ -2992,8 +2830,7 @@ private Map> makeDataVectors( GeoValues values, List> makeDataVectors( GeoValues values, List ob = values.getValues( platform, quantitationTypeIndex, designElementName, indices ); - if ( ob == null || ob.size() == 0 ) - continue; + if ( ob == null || ob.size() == 0 ) continue; assert ob.size() == datasetSamples.size(); dataVectors.put( designElementName, ob ); } @@ -3020,24 +2856,19 @@ private Map> makeDataVectors( GeoValues values, List experimentalFactors, GeoReplication replication ) { + private void matchSampleReplicationToExperimentalFactorValue( BioMaterial bioMaterial, Collection experimentalFactors, GeoReplication replication ) { // find the experimentalFactor that matches this. FactorValue convertVariableToFactorValue = this.convertReplicationToFactorValue( replication ); - FactorValue matchingFactorValue = this - .findMatchingExperimentalFactorValue( experimentalFactors, convertVariableToFactorValue ); + FactorValue matchingFactorValue = this.findMatchingExperimentalFactorValue( experimentalFactors, convertVariableToFactorValue ); if ( matchingFactorValue != null ) { bioMaterial.getFactorValues().add( matchingFactorValue ); } else { - throw new IllegalStateException( - "Could not find matching factor value for " + replication + " in experimental design for sample " - + bioMaterial ); + throw new IllegalStateException( "Could not find matching factor value for " + replication + " in experimental design for sample " + bioMaterial ); } } @@ -3048,32 +2879,26 @@ private void matchSampleReplicationToExperimentalFactorValue( BioMaterial bioMat * for the * BioAssay */ - private boolean matchSampleToBioAssay( ExpressionExperiment expExp, BioAssayDimension bioAssayDimension, - String sampleAcc ) { + private boolean matchSampleToBioAssay( ExpressionExperiment expExp, BioAssayDimension bioAssayDimension, String sampleAcc ) { for ( BioAssay bioAssay : expExp.getBioAssays() ) { if ( sampleAcc.equals( bioAssay.getAccession().getAccession() ) ) { bioAssayDimension.getBioAssays().add( bioAssay ); - GeoConverterImpl.log - .debug( "Found sample match for bioAssay " + bioAssay.getAccession().getAccession() ); + GeoConverterImpl.log.debug( "Found sample match for bioAssay " + bioAssay.getAccession().getAccession() ); return true; } } return false; } - private void matchSampleVariableToExperimentalFactorValue( BioMaterial bioMaterial, - Collection experimentalFactors, GeoVariable variable ) { + private void matchSampleVariableToExperimentalFactorValue( BioMaterial bioMaterial, Collection experimentalFactors, GeoVariable variable ) { // find the experimentalFactor that matches this. FactorValue convertVariableToFactorValue = this.convertVariableToFactorValue( variable ); - FactorValue matchingFactorValue = this - .findMatchingExperimentalFactorValue( experimentalFactors, convertVariableToFactorValue ); + FactorValue matchingFactorValue = this.findMatchingExperimentalFactorValue( experimentalFactors, convertVariableToFactorValue ); if ( matchingFactorValue == null ) { - throw new IllegalStateException( - "Could not find matching factor value for " + variable + " in experimental design for sample " - + bioMaterial ); + throw new IllegalStateException( "Could not find matching factor value for " + variable + " in experimental design for sample " + bioMaterial ); } // make sure we don't put the factor value on more than once. @@ -3096,8 +2921,7 @@ private void sanityCheckQuantitationTypes( List datasetSamples ) { for ( GeoSample sample : datasetSamples ) { if ( sample.hasUsableData() ) { reference = sample.getColumnNames(); - if ( !reference.isEmpty() ) - break; + if ( !reference.isEmpty() ) break; } else { expectingData = false; } @@ -3137,8 +2961,7 @@ private void sanityCheckQuantitationTypes( List datasetSamples ) { } } if ( someDidntMatch ) { - GeoConverterImpl.log - .warn( "Samples do not have consistent quantification type names. Last error was: " + lastError ); + GeoConverterImpl.log.warn( "Samples do not have consistent quantification type names. Last error was: " + lastError ); } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/geo/GeoFamilyParser.java b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/geo/GeoFamilyParser.java index 94695f8807..11342a95eb 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/geo/GeoFamilyParser.java +++ b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/geo/GeoFamilyParser.java @@ -98,7 +98,7 @@ public Collection getResults() { @Override public void parse( File f ) throws IOException { - try (InputStream a = new FileInputStream( f )) { + try ( InputStream a = new FileInputStream( f ) ) { this.parse( a ); } } @@ -113,7 +113,7 @@ public void parse( InputStream is ) throws IOException { throw new IOException( "No bytes to read from the input stream." ); } - try (final BufferedReader dis = new BufferedReader( new InputStreamReader( is ) )) { + try ( final BufferedReader dis = new BufferedReader( new InputStreamReader( is ) ) ) { GeoFamilyParser.log.debug( "Parsing...." ); @@ -172,7 +172,7 @@ public Exception call() { @Override public void parse( String fileName ) throws IOException { - try (InputStream is = FileTools.getInputStreamFromPlainOrCompressedFile( fileName )) { + try ( InputStream is = FileTools.getInputStreamFromPlainOrCompressedFile( fileName ) ) { this.parse( is ); } } @@ -441,7 +441,7 @@ private int extractChannelNumber( String line ) { /** * Turns a line in the format #key = value into a column name and description. This is used to handle lines such as * (in a platform section of a GSE file): - * + * *
      * #SEQ_LEN = Sequence length
      * 
@@ -672,7 +672,7 @@ private void initializeQuantitationTypes() { GeoFamilyParser.log .debug( "Data column " + columnName + " will be skipped for " + this.currentSample() + " - it is an 'unwanted' quantitation type (column number " + currentIndex - .get( platformForSample ) + .get( platformForSample ) + ", " + desiredColumnNumber + "the quantitation type.)" ); } else { @@ -717,15 +717,15 @@ else if ( object instanceof GeoSample ) * Parse the column identifier strings from a GDS or GSE file. * In GSE files, in a 'platform' section, these become column descriptions for the platform descriptors. * For samples in GSE files, they become values for the data in the sample. For example - * + * *
      * #ID_REF = probe id
      * #VALUE = RMA value
      * 
- * + * * In GDS files, if we are in a 'dataset' section, these become "titles" for the samples if they aren't already * provided. Here is an example. - * + * *
      * #GSM549 = Value for GSM549: lexA vs. wt, before UV treatment, MG1655; src: 0' wt, before UV treatment, 25 ug total RNA, 2 ug pdN6<->0' lexA, before UV 25 ug total RNA, 2 ug pdN6
      * #GSM542 = Value for GSM542: lexA 20' after NOuv vs. 0', MG1655; src: 0', before UV treatment, 25 ug total RNA, 2 ug pdN6<->lexA 20 min after NOuv, 25 ug total RNA, 2 ug pdN6
@@ -1441,6 +1441,8 @@ private void sampleSetLibSource( String accession, String string ) {
             sample.setLibSource( "transcriptomic" );
         } else if ( string.equalsIgnoreCase( "genomic" ) ) {
             sample.setLibSource( "genomic" );
+        } else if ( string.equalsIgnoreCase( "transcriptomic single cell" ) ) {
+            sample.setLibSource( "transcriptomic single cell" );
         } else {
             throw new IllegalArgumentException( "Unknown library source: " + string );
         }
diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/geo/GeoConverterTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/geo/GeoConverterTest.java
index b9959c3105..282aa9b7dd 100644
--- a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/geo/GeoConverterTest.java
+++ b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/geo/GeoConverterTest.java
@@ -438,13 +438,13 @@ public void testConvertGSE60() throws Exception {
      */
     @SuppressWarnings("unchecked")
     @Test
-    public void testConvertGSE8134() throws Exception {
+    public void testConvertGSE235534() throws Exception {
         InputStream is = new GZIPInputStream(
-                new ClassPathResource( "/data/loader/expression/geo/GSE8134_family.soft.gz" ).getInputStream() );
+                new ClassPathResource( "/data/loader/expression/geo/GSE235534_family.soft.gz" ).getInputStream() );
         GeoFamilyParser parser = new GeoFamilyParser();
         parser.parse( is );
 
-        GeoSeries series = ( ( GeoParseResult ) parser.getResults().iterator().next() ).getSeriesMap().get( "GSE8134" );
+        GeoSeries series = ( ( GeoParseResult ) parser.getResults().iterator().next() ).getSeriesMap().get( "GSE235534" );
         DatasetCombiner datasetCombiner = new DatasetCombiner();
         GeoSampleCorrespondence correspondence = datasetCombiner.findGSECorrespondence( series );
         series.setSampleCorrespondence( correspondence );
@@ -452,8 +452,10 @@ public void testConvertGSE8134() throws Exception {
         assertNotNull( result );
         Collection ees = ( Collection ) result;
         assertEquals( 1, ees.size() );
+        assertEquals( 6, ees.iterator().next().getBioAssays().size() );
     }
 
+
     /*
      * Case where the same sample can be in multiple series, we had problems with it.
      */
@@ -775,7 +777,7 @@ public void testParseGSE44625() throws Exception {
         boolean found1 = false;
         for ( BioAssay ba : ee.getBioAssays() ) {
             for ( Characteristic c : ba.getSampleUsed().getCharacteristics() ) {
-               // log.info( c );
+                // log.info( c );
                 String category = c.getCategory();
                 if ( !category.equals( "molecular entity" ) && !category.equals( "labelling" ) && !category.equals( "BioSource" ) ) { // we lose these original strings, or they have diff format; not important.
                     assertNotNull( c.getOriginalValue() );
diff --git a/gemma-core/src/test/resources/data/loader/expression/geo/GSE235534_family.soft.gz b/gemma-core/src/test/resources/data/loader/expression/geo/GSE235534_family.soft.gz
new file mode 100644
index 0000000000000000000000000000000000000000..d80efdeb21b7ab09bd9356e5b2b67f91e9b58ad2
GIT binary patch
literal 7346
zcmZu!bwHGBv!_cMiDf}j>6Y#m>F#cUrE@7kKxtUIbLs8|l`bjiZV(WWX5pgec)oM*
z{dV?`XXcsTZ{~@2=6xw5(BOXm5RWnqJr?IpH|Svo3}l%Y1W__!4Y@&KA^^~
ziALj8;jBJ0%6w1J3kGDgTxi^XFESTwMD?I`^u19xJT2tA=6quLwB;?Eh@ziotklNL
zEkwC0YO9i>7nJeYcl>%{QQi6eR3RhVpFMtQs3&UoCYzy`Hrm?p;sWI82<+r?0(SB`
zTpbs1VDEs<0VKSGB}YDPsIZ$Rmr06yXLlEGAD7*p09`QQqsNQNjN#m7_f7N$ymP|TOG0b2
zTk{`R>~6c0#KD5%mhl%l$1+@;T-4T|bB|<|_QuRQO)bAIX~;uSr|q+^bpn9VhZidA1p
zcPjhxes$0LC@<V1src
zm%jArY56Qe;}@3F2&nuAy<~14tmJL>&uQW>G?7~*M&ZPaYDMeA-{ZD
z@}kGgm-Hw-y7cW~%V}oy=0?tFTkKD|q1z>>efmu}o#sRd#S5baiHmNWX_X?za9wtJ
zA3v;QWhC{
zHfef0u^%~J0Etzx_H5GFd
zPY%n)5$(s-hQtDw5=HI0{YXKR%2cbVb1^iNuD(ds9FHxirGU|!R}#=-jvbo38y0ux
zCo82viqTgZW7##aS%~*-?=~GH?<#+S#%->~hcmfdP#@KN
z3frJO%|VZb!kg3&f?2V=pC|#!R_Is+gz{vqdW~XJAhEL(vCH6&Zs&Nw`HwdDBfmTz
zOkz=11YFIS9l+u>nX0z%(lu_R%+~eu{S}BI;P|fAHA%`vE-gmwxP9?@z&SwIqbN=|
zr?1A-gY{bq!gEWQ$|A%a|;0nW&29J)}g>xgj
zzZ9j8S%Gt-!)b7Z`boooQ@(;L^iLW<9kZ>EZvRCw>X`L;oS6YvXrD9~9dlzxzwEb;
zZAyW^Go^QADuP$ateJBZ@HWqzg0Xz6aq$^{hE!mH5Q0mmE$b+{A}?V!ifvwK5b(6W
zDESiqY4eYtTc2h+o?bW&Kv)c$nB@%~sIlcNr+meScKK*KQk%0R5F_3na#C`Dr#Yw%
zt$I!4Qk4?v(d3?z&6|p;*ZQ%|Qykkn5~-NQ_>=y?
zs_^gfb`m|K%pD9rd$>OM(p?2;pt=pE{1$5E$HT6+rtnaKwA=A&xwVL4`V!WG9COUz
zdcDd!P06blxy+8gyhU2PE_K#-71Pg%w%?RKAQzEmfaf=kkE^v9lku><&W2Tcr!yZX
zK56JON74vOo%$|oYAzNaY~)9boT5T^hkdPV$*1JnrF#$zz@x6yBb8zk(R;~VpKJ``
zUQ{gGaVeTTS?(fRG}Z_YHac6W*YIXZNHg%Z9mZn)xW@Z&iZ{eE3w#V{M)F`>U+LXm
z9mm13VWniaj){l~n>`UR%pyGStfALy4e~hf9!&U{&X;Vw!F1iW$CJJ>6J|pCejuym
zU3yhi+cC@gWnP0CY7f3AAv1&(&>0Sb6@7v-r6vo_@|n$ds8)#uT&IX^B(CL_J`08t
z$`OL0paKqwS*znvXEU%&5GC1SCs1)eZy%p)N`i~kpLMhciJvZ;ku`ZIwB6(>6!dXT
zi!Nt))CCMBcku@MJGF2Nj#k-^)h>tcY!wdgz=ON=7c*w|lemk{$Con%0;e&@O+c=}
z7oDx*@=3`RI%m{%4aSQ(Gy9CB2^dUi%Pt>@S8}w&tcfXMV;+^Z@zf)Jj$yV$susF18M7*qMNoIKF^S*atP;B~Euc
z|A^aK=m?1MB=;LB+Zjq;@KYuE|(U@Y*b9c4jlC-KQN+8#`_>H;w3`+Z{FyY=90+CMK%axZ2Nr@
zY@M1IAd^?-d#fiFRnqkQ38L8;(>J4n*#N^;Ih1)&I7@2)k!-eJ&>fys9lCXyRK9gW
zDOJ@09_FXv-7z_~vftD6&(0<1RRb}ojm9m_+a0-^I>-+^f@7OziwV(#qK<-uzMN85
z)uj0DEv^`DF=l_53V4W_l58lmxb=*{$F%Isgr-%5o<8f8dotm>RhlD{sD2t2-v6X*
zzA~^H^@HeE1q?`TQ8z1?!WWmCXvs`%CsT$+~mxS6@{zDcJa9BEzRbUcUhs0*+7O;^6}
zvH^x7Rw(5sYFErGuC4-gUtWjpQ_JM99yI`o2$L`l&Y%Y6c|?warw4o?7j7!a=3%>P
zsvf)TdhwRCAUC}XLln}4iUnC#ANLh|wZ+pu`{MpP5Hmw^8g_z!L-+7Y@HEO(zm7KE
zp$cx#!T=R)UqyDRQa=8Ma~_si<&e$%z&8>!ucO#sE77n`2ts^BRa)BWojV3R88!=D
zd&0|XdZ%%!@(8E2EAt2;8s&L}1R522gzDvO;ac@SzA)->oKI@jo7HZY*%Xe*m)Y#&
z(>rFrc=JPZ{D}slN~NcpN@s3~oPe#ETXYn_GEVFR3HPP7^NW<$F__z%Gc9yIjMBD*
zb!s`h?eiEMNPF}kxZwHt;xbI@A!oU~_LbT(EP`7LWv=xZJv#eXOy;Y9J^+q*d6?QQC1
z@w;JQgsbxTGmx%{xV>Yv!#@Y8W6
zFQzcS^~k6yK-&w6nim=N||HV0hk$<&THOrZ@7+*
zws~s#$76JF)rbQs{vh$Uh~U8%D{7mQ?ztP*^Y@H5k2)q1yL}l6(IFoGFK1pWi<6+_
z24yd}cH^vQIHyz)B^-_Q^2ptbH=iSA=yiNPk79uH>4-_v6YtPyf_FE*r((Lzts?w_
z0@qh@Lh;XZBu&Nk#18jd3+sZL`S&^aCCKP}Z|X(PuiYjnwLMGiEK9tfVH2aWX;Vy*
z7w)8>r4S0X5H-Ipt~`ecEkB>Uye1Sjho%jphwR7PmRTH$R_JZYaymhY!W
z!3@hLZp^sBt+`%&wW^vpmM+H&`|EoN{$1DBBIRiblLkD`7$5RHi@Cbwv>*?IF@_?1
zLxZT|_zYAcNxoigr9CAv&rb1RGo>sKV)1UW4iVskJg7
zNW@j;JH&qF+`S#-J&c(@Z|n}9?Wb%ULi;nAfz}&ioX=EHa~DJ0T++N4qg5YtpRyQA
z{3u@9j0p{c{a6A(+|`g0}<6h@_P80+M9-oC~Y!Ti#e
z_6`p*oOCNjLP}=`&V37<6rsSiEh6$-V*66nCWb%Wj@~6`x_~brAQ9nZZ&v$Y1Z~@v
z{ivpz^{z=}ycSb!tE<^coX?eTVM6_b^y12A@Jn+hqipn9~NvBp{=54yWyNNBXClGae{
zZ8h~*CTynZWI@{=R?iQ@B`I$X)OY0$+#<$OS8y?}mU?@)@;V{8ME!hgkiKf0H6?~d
zK}cZnhloLM8KD&6tV|7RcbV03>muD;XpfM7?K*~)_nohYmw4T=1}do~gn6?y4a0sL3IO#*dz6Tsk+w9=Xey8S?9i_YPJy*O8~F=7CWu{McflN?<#e?YqiqFN
zwg=^T5%pD+W?0y>yu}SJ^*p%~l>3fgY3W*4E=8ZW7+Wr)Wn4em`Upod)!4xauBuAz
z@0;f5{3I6N-x-VsZo0YR!=8<-l+-p8)dA_cgz)Qjxt1A6?!QCyL&ezzQ^-fKtx)TN
z=X<`HT(EF~$r0cF8hZSmsZ8xKv~*^%;>9lWp|u(ZYjD?Hc6wwldud|+2ldO;~2EX}tQh(B2w7h6lkNGE8Y^&LkmGa-K>?aZZdzGCZ
zMcPhZi~K9EN0M8Q)Q;nVKMDL=J7FD=Lgz}xYLyq;FyL{a{aV*;a)-~z9NQr{HUVnc
z$_TZH^KZY2c~A<*!g{H|UG%e8T;9{pUQ~*BLyS5JQj65Z`~%KVmN&sM}_k87cmC&P*vC`oGUq{6aOQ
z1Jd*n-|AsOeG@C<|81{FS+Q^TpR0gvRb_Ia^cXYccgz&w?0qe>i?GZ2{{p-aKtcG=
zDI0Qu+x|Mx=KEJ8S(n9}b?W_H{Ouw_dVXQcUI2>v`*s`HJqugZ7;t0b2Z8&pTIzLO
zs8rPEmuCy|+tPIt&v6w_s+eQc9*+`k$9o(b3B_Z|ANwUtla`g)fGcl2K0@Npb
z%4IG^CE4LBznX(R&xNA`%ANSbYVN{%3~_&yLHPMHDN1v-A!C0ft+rk46dsjZ(Nv^j
z^M|mD$gB6C@8)JLL;?D!46Q1Zu9oti->L8n4nF)m2Eg#J3CCZ4_TKQ~d{3e&lbc|b
zB+0LKsIFT>xkp>@hlm|w^qm0vLm#J
zcpK|5S^(p3;BLq)PP})0>Lr<0_`#8IMn_SzNK5_UVfpm=ZBVns<($`Wa5E@(MBMKdq+p
zGQb^=Ik3fT7|&U29Z0xh*;uAE97J>Jq*~1buzRCott3c4ZR&ewKzA&JMi06Mwo9`E
z^?NF$L0DcTuKdkVbvyDxm%x>%#=w$_$4AGs)Oo;*8ydXs(;g$W_Tr^B{RJgTw
zdvAmt2Off+B0_97_ildB48n1DErSvQUBCDS25Qe$@vJzY!UqS6cT<|+vZIk8-MOJR
zgAOr~FcDCGo7sW`e+9s+cDB0gLvJ>GvF4%FUEzRREgpN0ltHkshQRZM=pXM#lJ8T`
z*F02N1OHH7A>%vMJzifVd$PA3&Irc|=*
zxECA7n_o5+&m5a{H&#JYTdNL24@;w|)pz0rrs+syv%&}tI*16Ob;{H%h1@C}#UvV-
zj3|Twj*Ldgu3bbC5q0fEbXuyCaFOtRaOQy=-B-v#&cDi)i9Ol7c4zj+ArpIBp_x^=
zg}&g4<#taIv_2eB;3Fjo;vWQw$z6MQv*tAJZ#;qj4mQ)VE{@
zrJA%S(m`Esd~qYSy=)e|GD#o(Q2vmd)*)<(M6$T_rAq|jNU~-~78jUFdypPRAy01@l
z2uQc7lo~0>04U1VT^O30zSf_@sX4{KQ3qC;1V{DsnV45|%#qk*^7vcXFR_H9{jH$@
zyZCrkc;8Ai1DmcJ#GaM=+xlM
zbDTKetZlJbA*7z!C1`jfG$k;u0c0}^j>?`uO$+SpHZ$Rp?vLXTHU{-&yOiVOI(FY!^NH9uQ@x~cQ($DaWX4KAzjQ(Ev#
z!z}tTgIfzcG9_!`+o~~lzfFJHR(gsX=e3a^)6qO}+x*)ALMlu1>q(a?Gv&lI#Es$7U6g6olpSI#_NQq>-m|iyNWvtuFGt~6m
z0Zy$qX+z=|66`^kLF<`sj2u2}H=jh)ae#9qj{=?p@{A&}-=&In_+`fT@gIpr6Vo-Yqv6cZYW>9}~0j#`C_haq51
zwc|Efg$xQ}4O1swQ&ZF2xJhZ`vf@q*`iohO({P2;&Ow2Fi0%izAvkuru9CFuo0J>(zwEoHY|X@cw?bXj}yi1wL3lB;Uof67Oy0GsGQpEa+m@blEp%=)|{nP}59XSO-
z0Uo-F0jts(@F**@8V;CS;B51!Y>z*0iWNiF3&|EvvUN8MsJ`A{(
z_Luo88;6@0l;WJzf#oP?ntm4^(t6zNPwT@^mJxK{mWydoGQ|=OU|2X1KL`9CAlH7S
mvHR8X2Hs@kv^iTGCB!vAT+A`zR-?-waPUi@WVB8QaQ_29np7hI

literal 0
HcmV?d00001


From c23ca32f8a2e16c2e4d96237731bbcd5276b13a6 Mon Sep 17 00:00:00 2001
From: Guillaume Poirier-Morency 
Date: Mon, 10 Jun 2024 12:00:25 -0700
Subject: [PATCH 57/81] rest: Improve additional servers displayed in the dev
 spec

---
 .../ubic/gemma/rest/util/OpenApiConfig.java    | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/OpenApiConfig.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/OpenApiConfig.java
index bc975f5abd..4c58e8361b 100644
--- a/gemma-rest/src/main/java/ubic/gemma/rest/util/OpenApiConfig.java
+++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/OpenApiConfig.java
@@ -16,6 +16,13 @@
 @Configuration
 public class OpenApiConfig {
 
+    private final Server[] additionalServers = {
+            new Server().url( "https://gemma.msl.ubc.ca/rest/v2" ),
+            new Server().url( "https://gemma-staging.msl.ubc.ca/rest/v2" ),
+            new Server().url( "https://dev.gemma.msl.ubc.ca/rest/v2" ),
+            new Server().url( "http://localhost:8080/rest/v2" )
+    };
+
     @Value("${gemma.hosturl}")
     private String hostUrl;
 
@@ -23,12 +30,15 @@ public class OpenApiConfig {
     public FactoryBean openApi( CustomModelResolver customModelResolver, Environment environment ) {
         OpenApiFactory factory = new OpenApiFactory( "ubic.gemma.rest" );
         ArrayList servers = new ArrayList<>();
-        servers.add( new Server().url( hostUrl + "/rest/v2" ) );
+        String mainServerUrl = hostUrl + "/rest/v2";
+        servers.add( new Server().url( mainServerUrl ) );
         if ( environment.acceptsProfiles( EnvironmentProfiles.DEV ) ) {
             // provide additional servers for development
-            servers.add( new Server().url( "http://localhost:8080/rest/v2" ) );
-            servers.add( new Server().url( "https://gemma-staging.msl.ubc.ca/rest/v2" ) );
-            servers.add( new Server().url( "https://dev.gemma.msl.ubc.ca/rest/v2" ) );
+            for ( Server additionalServer : additionalServers ) {
+                if ( !additionalServer.getUrl().equals( mainServerUrl ) ) {
+                    servers.add( additionalServer );
+                }
+            }
         }
         factory.setServers( servers );
         factory.setModelConverters( Collections.singletonList( customModelResolver ) );

From 13e34fda27fb559ac7e188e11468bb46e9941dec Mon Sep 17 00:00:00 2001
From: Guillaume Poirier-Morency 
Date: Mon, 10 Jun 2024 10:58:26 -0700
Subject: [PATCH 58/81] Fix timezone handling when parsing Maven datetime from
 buildinfo

The timezone is represented in UTC.
---
 .../main/java/ubic/gemma/core/util/BuildInfo.java  | 14 +++++++++-----
 .../WebApplicationExceptionMapperTest.java         |  2 +-
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/BuildInfo.java b/gemma-core/src/main/java/ubic/gemma/core/util/BuildInfo.java
index a0e3d8cf5d..998a2b6692 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/util/BuildInfo.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/util/BuildInfo.java
@@ -4,7 +4,6 @@
 import org.springframework.beans.factory.InitializingBean;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.core.io.ClassPathResource;
-import org.springframework.format.datetime.DateFormatter;
 import org.springframework.stereotype.Component;
 
 import javax.annotation.Nullable;
@@ -13,15 +12,22 @@
 import java.io.InputStream;
 import java.text.DateFormat;
 import java.text.ParseException;
+import java.text.SimpleDateFormat;
 import java.util.Date;
 import java.util.Locale;
 import java.util.Properties;
+import java.util.TimeZone;
 
 @Component
 @CommonsLog
 public class BuildInfo implements InitializingBean {
 
-    private static final String MAVEN_DATETIME_PATTERN = "yyyy-MM-dd'T'HH:mm:ss'Z'";
+    private static final DateFormat MAVEN_DATETIME_PATTERN;
+
+    static {
+        MAVEN_DATETIME_PATTERN = new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ENGLISH );
+        MAVEN_DATETIME_PATTERN.setTimeZone( TimeZone.getTimeZone( "UTC" ) );
+    }
 
     /**
      * Retrieve build information directly from the classpath.
@@ -64,9 +70,7 @@ private BuildInfo( String version, String timestampAsString, String gitHash ) {
     public void afterPropertiesSet() {
         if ( timestampAsString != null ) {
             try {
-                timestamp = new DateFormatter( MAVEN_DATETIME_PATTERN )
-                        .parse( timestampAsString, Locale.getDefault() );
-
+                timestamp = MAVEN_DATETIME_PATTERN.parse( timestampAsString );
             } catch ( ParseException e ) {
                 log.error( "Failed to parse build timestamp.", e );
             }
diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/providers/WebApplicationExceptionMapperTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/providers/WebApplicationExceptionMapperTest.java
index c6bb319ba9..1344fbaa67 100644
--- a/gemma-rest/src/test/java/ubic/gemma/rest/providers/WebApplicationExceptionMapperTest.java
+++ b/gemma-rest/src/test/java/ubic/gemma/rest/providers/WebApplicationExceptionMapperTest.java
@@ -116,7 +116,7 @@ public void testJsonRepresentation() {
                 .extracting( "response" )
                 .extracting( "entity" )
                 .asInstanceOf( InstanceOfAssertFactories.INPUT_STREAM )
-                .hasContent( String.format( "{\"error\":{\"code\":400,\"message\":\"test\"},\"apiVersion\":\"%s\",\"buildInfo\":{\"version\":\"1.0.0\",\"timestamp\":\"2024-05-20T11:41:58.000+00:00\",\"gitHash\":\"1234\"}}",
+                .hasContent( String.format( "{\"error\":{\"code\":400,\"message\":\"test\"},\"apiVersion\":\"%s\",\"buildInfo\":{\"version\":\"1.0.0\",\"timestamp\":\"2024-05-20T04:41:58.000+00:00\",\"gitHash\":\"1234\"}}",
                         version ) );
     }
 }
\ No newline at end of file

From 9c02d159fcddadbe9817136a66ae705385ad7b3a Mon Sep 17 00:00:00 2001
From: Guillaume Poirier-Morency 
Date: Tue, 11 Jun 2024 11:05:56 -0700
Subject: [PATCH 59/81] Properly format the build info timestamp

Use a