From 6a461968a5ce9aa3fa81ca01657fb91e7fb9e3b4 Mon Sep 17 00:00:00 2001 From: axif Date: Mon, 7 Oct 2024 15:14:58 +0600 Subject: [PATCH 1/4] add test and fix capitalization --- src/scribe_data/cli/get.py | 2 +- src/scribe_data/wikidata/query_data.py | 2 +- tests/cli/test_get.py | 107 ++++++++++++++++++++----- 3 files changed, 88 insertions(+), 23 deletions(-) diff --git a/src/scribe_data/cli/get.py b/src/scribe_data/cli/get.py index 8f5abb43..323404e1 100644 --- a/src/scribe_data/cli/get.py +++ b/src/scribe_data/cli/get.py @@ -92,7 +92,7 @@ def get_data( if all: print("Updating all languages and data types ...") - query_data(None, None, overwrite) + query_data(None, None, None, overwrite) subprocess_result = True # MARK: Emojis diff --git a/src/scribe_data/wikidata/query_data.py b/src/scribe_data/wikidata/query_data.py index 4082b3a2..afd12e41 100644 --- a/src/scribe_data/wikidata/query_data.py +++ b/src/scribe_data/wikidata/query_data.py @@ -64,7 +64,7 @@ def query_data( PATH_TO_LANGUAGE_EXTRACTION_FILES = ( SCRIBE_DATA_SRC_PATH / "language_data_extraction" ) - + languages = [lang.capitalize() for lang in languages] current_languages = list(language_metadata["languages"]) current_data_type = ["nouns", "verbs", "prepositions"] diff --git a/tests/cli/test_get.py b/tests/cli/test_get.py index c96f6a7c..9269fbac 100644 --- a/tests/cli/test_get.py +++ b/tests/cli/test_get.py @@ -22,31 +22,96 @@ import unittest from unittest.mock import patch - from scribe_data.cli.get import get_data -class TestCLIGetCommand(unittest.TestCase): - @unittest.skip("Mocking doesn't work as expected.") - def test_get_command(self): - with patch("scribe_data.cli.get.get_data") as mock_get_data: - # Call the function you're testing - get_data( - language="English", - data_type="nouns", - output_dir="tests_output", - output_type="json", - ) +class TestGetData(unittest.TestCase): + # MARK: Test for subprocess patching + @patch("subprocess.run") + def test_get_emoji_keywords(self, mock_subprocess_run): + get_data(language="English", data_type="emoji-keywords") + self.assertTrue(mock_subprocess_run.called) + + # MARK: Test for invalid arguments + def test_invalid_arguments(self): + with self.assertRaises(ValueError): + get_data() + + # MARK: Test for getting all data + @patch("scribe_data.cli.get.query_data") + def test_get_all_data(self, mock_query_data): + get_data(all=True) + mock_query_data.assert_called_once_with(None, None, None, False) + + # MARK: Test for specific language and data type + @patch("scribe_data.cli.get.query_data") + def test_get_specific_language_and_data_type(self, mock_query_data): + get_data(language="german", data_type="nouns", output_dir="./test_output") + mock_query_data.assert_called_once_with( + languages=["german"], + data_type=["nouns"], + output_dir="./test_output", + overwrite=False, + ) + + # MARK: Test for capitalized language input + @patch("scribe_data.cli.get.query_data") + def test_get_data_with_capitalized_language(self, mock_query_data): + get_data(language="German", data_type="nouns") + mock_query_data.assert_called_once_with( + languages=["German"], + data_type=["nouns"], + output_dir="scribe_data_json_export", + overwrite=False, + ) - get_data(all=True) + # MARK: Test for lowercase language input + @patch("scribe_data.cli.get.query_data") + def test_get_data_with_lowercase_language(self, mock_query_data): + get_data(language="german", data_type="nouns") + mock_query_data.assert_called_once_with( + languages=["german"], + data_type=["nouns"], + output_dir="scribe_data_json_export", + overwrite=False, + ) - # Validate the calls. - assert mock_get_data.call_count == 2 + # MARK: Test for different output directory + @patch("scribe_data.cli.get.query_data") + def test_get_data_with_different_output_directory(self, mock_query_data): + get_data( + language="german", data_type="nouns", output_dir="./custom_output_test" + ) + mock_query_data.assert_called_once_with( + languages=["german"], + data_type=["nouns"], + output_dir="./custom_output_test", + overwrite=False, + ) - args, kwargs = mock_get_data.mock_calls[0] - self.assertEqual(args, ("English", "nouns", "tests_output")) - self.assertFalse(kwargs.get("all")) + # MARK: Test for overwrite=True + @patch("scribe_data.cli.get.query_data") + def test_get_data_with_overwrite_true(self, mock_query_data): + get_data(language="English", data_type="verbs", overwrite=True) + mock_query_data.assert_called_once_with( + languages=["English"], + data_type=["verbs"], + output_dir="scribe_data_json_export", + overwrite=True, + ) - args, kwargs = mock_get_data.mock_calls[-1] # Get the last call - self.assertIsNone(args) - self.assertTrue(kwargs["all"]) + # MARK: Test for overwrite=False + @patch("scribe_data.cli.get.query_data") + def test_get_data_with_overwrite_false(self, mock_query_data): + get_data( + language="English", + data_type="verbs", + overwrite=False, + output_dir="./custom_output_test", + ) + mock_query_data.assert_called_once_with( + languages=["English"], + data_type=["verbs"], + output_dir="./custom_output_test", + overwrite=False, + ) From de9c931c42657bdc0d26271e988a47522bcf8f70 Mon Sep 17 00:00:00 2001 From: Andrew Tavis McAllister Date: Tue, 8 Oct 2024 00:23:06 +0200 Subject: [PATCH 2/4] Minor edits to marks to make them less verbose --- tests/cli/test_get.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/tests/cli/test_get.py b/tests/cli/test_get.py index 9269fbac..bcaaed66 100644 --- a/tests/cli/test_get.py +++ b/tests/cli/test_get.py @@ -22,28 +22,33 @@ import unittest from unittest.mock import patch + from scribe_data.cli.get import get_data class TestGetData(unittest.TestCase): - # MARK: Test for subprocess patching + # MARK: Subprocess Patching + @patch("subprocess.run") def test_get_emoji_keywords(self, mock_subprocess_run): get_data(language="English", data_type="emoji-keywords") self.assertTrue(mock_subprocess_run.called) - # MARK: Test for invalid arguments + # MARK: Invalid Arguments + def test_invalid_arguments(self): with self.assertRaises(ValueError): get_data() - # MARK: Test for getting all data + # MARK: All Data + @patch("scribe_data.cli.get.query_data") def test_get_all_data(self, mock_query_data): get_data(all=True) mock_query_data.assert_called_once_with(None, None, None, False) - # MARK: Test for specific language and data type + # MARK: Language and Data Type + @patch("scribe_data.cli.get.query_data") def test_get_specific_language_and_data_type(self, mock_query_data): get_data(language="german", data_type="nouns", output_dir="./test_output") @@ -54,7 +59,8 @@ def test_get_specific_language_and_data_type(self, mock_query_data): overwrite=False, ) - # MARK: Test for capitalized language input + # MARK: Capitalized Language + @patch("scribe_data.cli.get.query_data") def test_get_data_with_capitalized_language(self, mock_query_data): get_data(language="German", data_type="nouns") @@ -65,7 +71,8 @@ def test_get_data_with_capitalized_language(self, mock_query_data): overwrite=False, ) - # MARK: Test for lowercase language input + # MARK: Lowercase Language + @patch("scribe_data.cli.get.query_data") def test_get_data_with_lowercase_language(self, mock_query_data): get_data(language="german", data_type="nouns") @@ -76,7 +83,8 @@ def test_get_data_with_lowercase_language(self, mock_query_data): overwrite=False, ) - # MARK: Test for different output directory + # MARK: Output Directory + @patch("scribe_data.cli.get.query_data") def test_get_data_with_different_output_directory(self, mock_query_data): get_data( @@ -89,7 +97,8 @@ def test_get_data_with_different_output_directory(self, mock_query_data): overwrite=False, ) - # MARK: Test for overwrite=True + # MARK: Overwrite is True + @patch("scribe_data.cli.get.query_data") def test_get_data_with_overwrite_true(self, mock_query_data): get_data(language="English", data_type="verbs", overwrite=True) @@ -100,7 +109,8 @@ def test_get_data_with_overwrite_true(self, mock_query_data): overwrite=True, ) - # MARK: Test for overwrite=False + # MARK: Overwrite is False + @patch("scribe_data.cli.get.query_data") def test_get_data_with_overwrite_false(self, mock_query_data): get_data( From abc843239a64f51ef49ee8bb5486bd70528fa45e Mon Sep 17 00:00:00 2001 From: axif Date: Tue, 8 Oct 2024 04:40:02 +0600 Subject: [PATCH 3/4] windows compactable --- src/scribe_data/wikidata/query_data.py | 36 +++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/scribe_data/wikidata/query_data.py b/src/scribe_data/wikidata/query_data.py index afd12e41..fb79eb78 100644 --- a/src/scribe_data/wikidata/query_data.py +++ b/src/scribe_data/wikidata/query_data.py @@ -24,6 +24,8 @@ import os from pathlib import Path from urllib.error import HTTPError +import sys +import subprocess from tqdm.auto import tqdm @@ -262,7 +264,39 @@ def query_data( / target_type / f"format_{target_type}.py" ) - os.system(f"python3 {formatting_file_path} --file-path {output_dir}") + # Replace the original os.system call with: + execute_formatting_script(formatting_file_path, output_dir) + + # os.system(f"python3 {formatting_file_path} --file-path {output_dir}") + + +def execute_formatting_script(formatting_file_path, output_dir): + # Determine the root directory of the project + project_root = Path(__file__).parent.parent + + if sys.platform.startswith("win"): + python_executable = sys.executable + pythonpath = str(project_root) + + # Create environment with updated PYTHONPATH + env = os.environ.copy() + if "PYTHONPATH" in env: + env["PYTHONPATH"] = f"{pythonpath};{env['PYTHONPATH']}" + else: + env["PYTHONPATH"] = pythonpath + + # Use subprocess.run instead of os.system + subprocess.run( + [python_executable, str(formatting_file_path), "--file-path", output_dir], + env=env, + check=True, + ) + else: + # Unix-like systems (Linux, macOS) + subprocess.run( + ["python3", str(formatting_file_path), "--file-path", output_dir], + check=True, + ) if __name__ == "__main__": From 77ba1effe54bf682124261d643bf68296df6947e Mon Sep 17 00:00:00 2001 From: Andrew Tavis McAllister Date: Tue, 8 Oct 2024 01:07:22 +0200 Subject: [PATCH 4/4] Order import and add docstring to formatting script run fxn --- src/scribe_data/wikidata/query_data.py | 86 +++++++++++++++----------- 1 file changed, 51 insertions(+), 35 deletions(-) diff --git a/src/scribe_data/wikidata/query_data.py b/src/scribe_data/wikidata/query_data.py index fb79eb78..3ec6657a 100644 --- a/src/scribe_data/wikidata/query_data.py +++ b/src/scribe_data/wikidata/query_data.py @@ -22,10 +22,10 @@ import json import os +import subprocess +import sys from pathlib import Path from urllib.error import HTTPError -import sys -import subprocess from tqdm.auto import tqdm @@ -35,6 +35,52 @@ from scribe_data.wikidata.wikidata_utils import sparql +def execute_formatting_script(formatting_file_path, output_dir): + """ + Executes a formatting script given a filepath and output directory for the process. + + Parameters + ---------- + formatting_file_path : str + The formatting file to run. + + output_dir : str + The output directory path for results. + + Returns + ------- + The results of the formatting script saved in the given output directory. + """ + # Determine the root directory of the project. + project_root = Path(__file__).parent.parent + + if sys.platform.startswith("win"): + python_executable = sys.executable + pythonpath = str(project_root) + + # Create environment with updated PYTHONPATH. + env = os.environ.copy() + if "PYTHONPATH" in env: + env["PYTHONPATH"] = f"{pythonpath};{env['PYTHONPATH']}" + + else: + env["PYTHONPATH"] = pythonpath + + # Use subprocess.run instead of os.system. + subprocess.run( + [python_executable, str(formatting_file_path), "--file-path", output_dir], + env=env, + check=True, + ) + + else: + # Unix-like systems (Linux, macOS). + subprocess.run( + ["python3", str(formatting_file_path), "--file-path", output_dir], + check=True, + ) + + def query_data( languages: str = None, data_type: str = None, @@ -264,39 +310,9 @@ def query_data( / target_type / f"format_{target_type}.py" ) - # Replace the original os.system call with: - execute_formatting_script(formatting_file_path, output_dir) - - # os.system(f"python3 {formatting_file_path} --file-path {output_dir}") - - -def execute_formatting_script(formatting_file_path, output_dir): - # Determine the root directory of the project - project_root = Path(__file__).parent.parent - - if sys.platform.startswith("win"): - python_executable = sys.executable - pythonpath = str(project_root) - - # Create environment with updated PYTHONPATH - env = os.environ.copy() - if "PYTHONPATH" in env: - env["PYTHONPATH"] = f"{pythonpath};{env['PYTHONPATH']}" - else: - env["PYTHONPATH"] = pythonpath - - # Use subprocess.run instead of os.system - subprocess.run( - [python_executable, str(formatting_file_path), "--file-path", output_dir], - env=env, - check=True, - ) - else: - # Unix-like systems (Linux, macOS) - subprocess.run( - ["python3", str(formatting_file_path), "--file-path", output_dir], - check=True, - ) + execute_formatting_script( + formatting_file_path=formatting_file_path, output_dir=output_dir + ) if __name__ == "__main__":