Skip to content

Commit

Permalink
Merge branch 'scribe-org:main' into Hindi
Browse files Browse the repository at this point in the history
  • Loading branch information
KesharwaniArpita authored Oct 8, 2024
2 parents c49e220 + dd056df commit 4515f51
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 27 deletions.
2 changes: 1 addition & 1 deletion src/scribe_data/cli/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def get_data(

if all:
print("Updating all languages and data types ...")
query_data(None, None, overwrite)
query_data(None, None, None, overwrite)
subprocess_result = True

# MARK: Emojis
Expand Down
54 changes: 52 additions & 2 deletions src/scribe_data/wikidata/query_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@

import json
import os
import subprocess
import sys
from pathlib import Path
from urllib.error import HTTPError

Expand All @@ -33,6 +35,52 @@
from scribe_data.wikidata.wikidata_utils import sparql


def execute_formatting_script(formatting_file_path, output_dir):
"""
Executes a formatting script given a filepath and output directory for the process.
Parameters
----------
formatting_file_path : str
The formatting file to run.
output_dir : str
The output directory path for results.
Returns
-------
The results of the formatting script saved in the given output directory.
"""
# Determine the root directory of the project.
project_root = Path(__file__).parent.parent

if sys.platform.startswith("win"):
python_executable = sys.executable
pythonpath = str(project_root)

# Create environment with updated PYTHONPATH.
env = os.environ.copy()
if "PYTHONPATH" in env:
env["PYTHONPATH"] = f"{pythonpath};{env['PYTHONPATH']}"

else:
env["PYTHONPATH"] = pythonpath

# Use subprocess.run instead of os.system.
subprocess.run(
[python_executable, str(formatting_file_path), "--file-path", output_dir],
env=env,
check=True,
)

else:
# Unix-like systems (Linux, macOS).
subprocess.run(
["python3", str(formatting_file_path), "--file-path", output_dir],
check=True,
)


def query_data(
languages: str = None,
data_type: str = None,
Expand Down Expand Up @@ -64,7 +112,7 @@ def query_data(
PATH_TO_LANGUAGE_EXTRACTION_FILES = (
SCRIBE_DATA_SRC_PATH / "language_data_extraction"
)

languages = [lang.capitalize() for lang in languages]
current_languages = list(language_metadata["languages"])
current_data_type = ["nouns", "verbs", "prepositions"]

Expand Down Expand Up @@ -262,7 +310,9 @@ def query_data(
/ target_type
/ f"format_{target_type}.py"
)
os.system(f"python3 {formatting_file_path} --file-path {output_dir}")
execute_formatting_script(
formatting_file_path=formatting_file_path, output_dir=output_dir
)


if __name__ == "__main__":
Expand Down
123 changes: 99 additions & 24 deletions tests/cli/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,27 +26,102 @@
from scribe_data.cli.get import get_data


class TestCLIGetCommand(unittest.TestCase):
@unittest.skip("Mocking doesn't work as expected.")
def test_get_command(self):
with patch("scribe_data.cli.get.get_data") as mock_get_data:
# Call the function you're testing
get_data(
language="English",
data_type="nouns",
output_dir="tests_output",
output_type="json",
)

get_data(all=True)

# Validate the calls.
assert mock_get_data.call_count == 2

args, kwargs = mock_get_data.mock_calls[0]
self.assertEqual(args, ("English", "nouns", "tests_output"))
self.assertFalse(kwargs.get("all"))

args, kwargs = mock_get_data.mock_calls[-1] # Get the last call
self.assertIsNone(args)
self.assertTrue(kwargs["all"])
class TestGetData(unittest.TestCase):
# MARK: Subprocess Patching

@patch("subprocess.run")
def test_get_emoji_keywords(self, mock_subprocess_run):
get_data(language="English", data_type="emoji-keywords")
self.assertTrue(mock_subprocess_run.called)

# MARK: Invalid Arguments

def test_invalid_arguments(self):
with self.assertRaises(ValueError):
get_data()

# MARK: All Data

@patch("scribe_data.cli.get.query_data")
def test_get_all_data(self, mock_query_data):
get_data(all=True)
mock_query_data.assert_called_once_with(None, None, None, False)

# MARK: Language and Data Type

@patch("scribe_data.cli.get.query_data")
def test_get_specific_language_and_data_type(self, mock_query_data):
get_data(language="german", data_type="nouns", output_dir="./test_output")
mock_query_data.assert_called_once_with(
languages=["german"],
data_type=["nouns"],
output_dir="./test_output",
overwrite=False,
)

# MARK: Capitalized Language

@patch("scribe_data.cli.get.query_data")
def test_get_data_with_capitalized_language(self, mock_query_data):
get_data(language="German", data_type="nouns")
mock_query_data.assert_called_once_with(
languages=["German"],
data_type=["nouns"],
output_dir="scribe_data_json_export",
overwrite=False,
)

# MARK: Lowercase Language

@patch("scribe_data.cli.get.query_data")
def test_get_data_with_lowercase_language(self, mock_query_data):
get_data(language="german", data_type="nouns")
mock_query_data.assert_called_once_with(
languages=["german"],
data_type=["nouns"],
output_dir="scribe_data_json_export",
overwrite=False,
)

# MARK: Output Directory

@patch("scribe_data.cli.get.query_data")
def test_get_data_with_different_output_directory(self, mock_query_data):
get_data(
language="german", data_type="nouns", output_dir="./custom_output_test"
)
mock_query_data.assert_called_once_with(
languages=["german"],
data_type=["nouns"],
output_dir="./custom_output_test",
overwrite=False,
)

# MARK: Overwrite is True

@patch("scribe_data.cli.get.query_data")
def test_get_data_with_overwrite_true(self, mock_query_data):
get_data(language="English", data_type="verbs", overwrite=True)
mock_query_data.assert_called_once_with(
languages=["English"],
data_type=["verbs"],
output_dir="scribe_data_json_export",
overwrite=True,
)

# MARK: Overwrite is False

@patch("scribe_data.cli.get.query_data")
def test_get_data_with_overwrite_false(self, mock_query_data):
get_data(
language="English",
data_type="verbs",
overwrite=False,
output_dir="./custom_output_test",
)
mock_query_data.assert_called_once_with(
languages=["English"],
data_type=["verbs"],
output_dir="./custom_output_test",
overwrite=False,
)

0 comments on commit 4515f51

Please sign in to comment.