Skip to content

Commit

Permalink
refactor(utils.py/json): docstrings, combine lines, add return type #52
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewtavis committed Oct 30, 2023
1 parent ad07bb1 commit 951bf86
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 143 deletions.
192 changes: 67 additions & 125 deletions src/scribe_data/resources/language_meta_data.json
Original file line number Diff line number Diff line change
@@ -1,128 +1,70 @@
{
"used by": "Scribe-Data/src/scribe_data/utils.py",
"description": {
"entry": {
"language": "the supported language. All lowercase",
"iso": "the ISO 639 code for 'language'. See https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes . All lowercase",
"qid": "the unique identifier of 'language' on Wikidata. 'Q' followed by one or more digits. See https://www.wikidata.org/wiki/Q43649390",
"remove-words": "words that should not be included as autosuggestions for the given language.",
"ignore-words": "TODO. Case sensitive."
}
"used by": "Scribe-Data/src/scribe_data/utils.py",
"description": {
"entry": {
"language": "the supported language. All lowercase",
"iso": "the ISO 639 code for 'language'. See https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes . All lowercase",
"qid": "the unique identifier of 'language' on Wikidata. 'Q' followed by one or more digits. See https://www.wikidata.org/wiki/Q43649390",
"remove-words": "words that should not be included as autosuggestions for the given language.",
"ignore-words": "TODO. Case sensitive."
}
},
"languages": [
{
"language": "english",
"iso": "en",
"qid": "Q1860",
"remove-words": ["of", "the", "The", "and"],
"ignore-words": []
},
"languages": [
{
"language": "english",
"iso": "en",
"qid": "Q1860",
"remove-words": [
"of",
"the",
"The",
"and"
],
"ignore-words": []
},
{
"language": "french",
"iso": "fr",
"qid": "Q150",
"remove-words": [
"of",
"the",
"The",
"and"
],
"ignore-words": [
"XXe"
]
},
{
"language": "german",
"iso": "de",
"qid": "Q188",
"remove-words": [
"of",
"the",
"The",
"and",
"NeinJa",
"et",
"redirect"
],
"ignore-words": [
"Gemeinde",
"Familienname"
]
},
{
"language": "italian",
"iso": "it",
"qid": "Q652",
"remove-words": [
"of",
"the",
"The",
"and",
"text",
"from"
],
"ignore-words": [
"The",
"ATP"
]
},
{
"language": "portuguese",
"iso": "pt",
"qid": "Q5146",
"remove-words": [
"of",
"the",
"The",
"and",
"jbutadptflora"
],
"ignore-words": []
},
{
"language": "russian",
"iso": "ru",
"qid": "Q7737",
"remove-words": [
"of",
"the",
"The",
"and"
],
"ignore-words": []
},
{
"language": "spanish",
"iso": "es",
"qid": "Q1321",
"remove-words": [
"of",
"the",
"The",
"and"
],
"ignore-words": []
},
{
"language": "swedish",
"iso": "sv",
"qid": "Q9027",
"remove-words": [
"of",
"the",
"The",
"and",
"Checklist",
"Catalogue"
],
"ignore-words": [
"databasdump"
]
}
]
{
"language": "french",
"iso": "fr",
"qid": "Q150",
"remove-words": ["of", "the", "The", "and"],
"ignore-words": ["XXe"]
},
{
"language": "german",
"iso": "de",
"qid": "Q188",
"remove-words": ["of", "the", "The", "and", "NeinJa", "et", "redirect"],
"ignore-words": ["Gemeinde", "Familienname"]
},
{
"language": "italian",
"iso": "it",
"qid": "Q652",
"remove-words": ["of", "the", "The", "and", "text", "from"],
"ignore-words": ["The", "ATP"]
},
{
"language": "portuguese",
"iso": "pt",
"qid": "Q5146",
"remove-words": ["of", "the", "The", "and", "jbutadptflora"],
"ignore-words": []
},
{
"language": "russian",
"iso": "ru",
"qid": "Q7737",
"remove-words": ["of", "the", "The", "and"],
"ignore-words": []
},
{
"language": "spanish",
"iso": "es",
"qid": "Q1321",
"remove-words": ["of", "the", "The", "and"],
"ignore-words": []
},
{
"language": "swedish",
"iso": "sv",
"qid": "Q9027",
"remove-words": ["of", "the", "The", "and", "Checklist", "Catalogue"],
"ignore-words": ["databasdump"]
}
]
}
28 changes: 10 additions & 18 deletions src/scribe_data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,17 +96,15 @@ def _find(source_key: str, source_value: str, target_key: str, error_msg: str):
ValueError: when a source_value is not supported.
Returns:
the 'target' value
The 'target' value given the passed arguments.
"""
norm_source_value = source_value.lower()

target_value = [
if target_value := [
entry[target_key]
for entry in _languages
if entry[source_key] == norm_source_value
]

if target_value:
]:
assert len(target_value) == 1, f"More than one entry for '{norm_source_value}'"
return target_value[0]

Expand Down Expand Up @@ -188,8 +186,7 @@ def get_language_from_iso(iso: str) -> str:

def get_language_words_to_remove(language: str) -> list[str]:
"""
Returns the words that should not be included as autosuggestions for the given
language.
Returns the words that should be removed during the data cleaning process for the given language.
Parameters
----------
Expand All @@ -199,8 +196,7 @@ def get_language_words_to_remove(language: str) -> list[str]:
Returns
-------
list[str]
The words that should not be included as autosuggestions for the given
language
The words that that be removed during the data cleaning process for the given language.
"""
return _find(
"language",
Expand All @@ -212,8 +208,7 @@ def get_language_words_to_remove(language: str) -> list[str]:

def get_language_words_to_ignore(language: str) -> list[str]:
"""
Returns the words that should not be included as autosuggestions for the given
language.
Returns the words that should not be included as autosuggestions for the given language.
Parameters
----------
Expand All @@ -223,8 +218,7 @@ def get_language_words_to_ignore(language: str) -> list[str]:
Returns
-------
list[str]
The words that should not be included as autosuggestions for the given
language
The words that should not be included as autosuggestions for the given language.
"""
return _find(
"language",
Expand All @@ -241,7 +235,7 @@ def get_path_from_format_file() -> str:
return "../../../../../.."


def get_path_from_load_dir():
def get_path_from_load_dir() -> str:
"""
Returns the directory path from the load directory to scribe-org.
"""
Expand Down Expand Up @@ -363,8 +357,7 @@ def check_and_return_command_line_args(
all_args, first_args_check=None, second_args_check=None
):
"""
Checks command line arguments passed to Scribe-Data files and returns them if
correct.
Checks command line arguments passed to Scribe-Data files and returns them if correct.
Parameters
----------
Expand All @@ -380,8 +373,7 @@ def check_and_return_command_line_args(
Returns
-------
first_args, second_args: list(str)
The subset of possible first and second arguments that have been verified
as being valid.
The subset of possible first and second arguments that have been verified as being valid.
"""
if len(all_args) == 1:
return None, None
Expand Down

0 comments on commit 951bf86

Please sign in to comment.