From a495b4c0d0db65de67c843d85877810506733898 Mon Sep 17 00:00:00 2001 From: Vincent Emonet Date: Tue, 19 Sep 2023 17:26:51 +0200 Subject: [PATCH] stop spark context to prevent memory leaks --- src/openpredict_model/train.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/openpredict_model/train.py b/src/openpredict_model/train.py index d75c1f2..744806a 100644 --- a/src/openpredict_model/train.py +++ b/src/openpredict_model/train.py @@ -462,6 +462,7 @@ def calculateCombinedSimilarity(pairs_train, pairs_test, classes_train, classes_ test_df = sparkBuildFeatures(spark_context, pairs_test, classes_test, knownDrugDis_bc.value, drug_df_bc.value, disease_df_bc.value) log.info("Finishing Spark jobs 🏁") + spark_context.stop() else: log.info("Spark cluster not found, using pandas 🐼") train_df = createFeatureDF( @@ -569,6 +570,7 @@ def createFeaturesSparkOrDF(pairs, classes, drug_df, disease_df): feature_df = sparkBuildFeatures( spark_context, pairs, classes, knownDrugDis_bc.value, drug_df_bc.value, disease_df_bc.value) log.info("Finishing Spark jobs 🏁") + spark_context.stop() else: log.info("Spark cluster not found, using pandas 🐼") feature_df = createFeatureDF(