Skip to content

Commit

Permalink
comment out the spark part
Browse files Browse the repository at this point in the history
  • Loading branch information
vemonet committed Sep 19, 2023
1 parent a495b4c commit d4a8f56
Showing 1 changed file with 29 additions and 29 deletions.
58 changes: 29 additions & 29 deletions src/openpredict_model/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,37 +159,37 @@ def get_spark_context():
:return: Spark context
"""
spark_master_url = os.getenv('SPARK_MASTER_URL')
sc = None
# spark_master_url = os.getenv('SPARK_MASTER_URL')

# if os.getenv('SPARK_HOME'):
# # Do not try to run Spark if SPARK_HOME env variable not set
# # import findspark
# # findspark.init(os.getenv('SPARK_HOME'))
# # sc = pyspark.SparkContext(appName="Pi", master='local[*]')

# if spark_master_url and sc is None:
# log.info(
# 'SPARK_MASTER_URL provided, connecting to the Spark cluster ✨')
# # e.g. spark://my-spark-spark-master:7077
# sc = pyspark.SparkContext(appName="Pi", master=spark_master_url)
# log.info(sc)
# else:
# # Most of the time use local Spark available in docker container
# try:
# log.info(
# 'SPARK_MASTER_URL not provided, trying to start Spark locally ✨')
# sc = pyspark.SparkContext.getOrCreate()
# # sc = pyspark.SparkContext(appName="Pi", master='local[*]')
# log.info(sc)
# except Exception as e:
# log.warning(e)
# log.info(
# "⚠️ Could not start a Spark cluster locally. Using pandas to handle dataframes 🐼")

if os.getenv('SPARK_HOME'):
# Do not try to run Spark if SPARK_HOME env variable not set
# import findspark
# findspark.init(os.getenv('SPARK_HOME'))
# sc = pyspark.SparkContext(appName="Pi", master='local[*]')

if spark_master_url and sc is None:
log.info(
'SPARK_MASTER_URL provided, connecting to the Spark cluster ✨')
# e.g. spark://my-spark-spark-master:7077
sc = pyspark.SparkContext(appName="Pi", master=spark_master_url)
log.info(sc)
else:
# Most of the time use local Spark available in docker container
try:
log.info(
'SPARK_MASTER_URL not provided, trying to start Spark locally ✨')
sc = pyspark.SparkContext.getOrCreate()
# sc = pyspark.SparkContext(appName="Pi", master='local[*]')
log.info(sc)
except Exception as e:
log.warning(e)
log.info(
"⚠️ Could not start a Spark cluster locally. Using pandas to handle dataframes 🐼")

else:
log.info(
'SPARK_HOME environment variable not found, using pandas to handle dataframes 🐼')
# else:
# log.info(
# 'SPARK_HOME environment variable not found, using pandas to handle dataframes 🐼')
return sc
# Old way:
# import findspark
Expand Down

0 comments on commit d4a8f56

Please sign in to comment.