Skip to content

Commit

Permalink
update initialise nltk functions
Browse files Browse the repository at this point in the history
  • Loading branch information
ColinDaglish committed Jul 11, 2023
1 parent 42de250 commit 4de3692
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 6 deletions.
2 changes: 1 addition & 1 deletion src/modules/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def get_total_feature_count(features: DataFrame) -> DataFrame:
return total_feature_count


def retrieve_named_entities(series: Series) -> list[list[str]]:
def retrieve_named_entities(series: Series) -> list:
"""retrieve any named entities from the series
Parameters
----------
Expand Down
39 changes: 34 additions & 5 deletions src/modules/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,40 @@ def lemmatizer(tokens: list) -> list:


def _initialise_nltk_component(extension: str, download_object: str):
"""download nltk component from package
"""spliter function to determine which initialisation path to run
Parameters
----------
extension: str
the filepath extension leading to where the model is saved
download_object: str
the object to download from nltk
Returns
-------
None
"""
if sys.platform.startswith("linux"):
_initialise_nltk_linux(download_object)
else:
_initialise_nltk_windows(extension, download_object)


def _initialise_nltk_linux(download_object: str) -> None:
"""initialise nltk component for linux environment (for github actions)
Parameters
----------
download_object: str
nltk object to download
Returns
-------
None
"""
nltk.download(download_object)
nltk.data.path.append("../home/runner/nltk_data")
return None


def _initialise_nltk_windows(extension: str, download_object: str):
"""initialise nltk component for a windows environment
Parameters
----------
extension: str
Expand All @@ -211,10 +244,6 @@ def _initialise_nltk_component(extension: str, download_object: str):
path = "C:/Users/" + username + "/AppData/Roaming/nltk_data/" + extension
if not os.path.exists(path):
nltk.download(download_object)
# Set path for runs on github actions
if sys.platform.startswith("linux"):
nltk.data.path.append("../home/runner/nltk_data")
else:
nltk.data.path.append("../local_packages/nltk_data")
return None

Expand Down

0 comments on commit 4de3692

Please sign in to comment.