Merge pull request #263 from SethiShreya/cli

Fixed custom user directory issue on get method issue #260
scribe-org · Oct 7, 2024 · bf187a4 · bf187a4
2 parents b7a0f82 + 3613d2f
commit bf187a4
Show file tree

Hide file tree

Showing 38 changed files with 551 additions and 203 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,5 @@
-# OS Files
-##########
+# MARK: OS Files
+
 .DS_Store
 .vscode/*
 !.vscode/extensions.json
@@ -8,8 +8,8 @@
 *wiki_partitions
 *wiki.ndjson
 
-# Python Files
-##############
+# MARK: Python Files
+
 # setup.py working directory
 build
 # setup.py dist directory
@@ -24,11 +24,15 @@ __pycache__
 venv
 .venv
 
-# NPM Files
-###########
+# MARK: NPM Files
+
 node_modules
 package-lock.json
 
-# Intermerdiary Data Files
-##########################
+# MARK: Intermerdiary Files
+
 **/*_queried.json
+
+# MARK: Test Files
+
+tests_output
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -46,6 +46,7 @@ Emojis for the following are chosen based on [gitmoji](https://gitmoji.dev/).
 - Many files were renamed including `update_data.py` being renamed `query_data.py`
 - Paths within the package have been updated to work for all operating systems via `pathlib` ([#125](https://github.com/scribe-org/Scribe-Data/issues/125)).
 - The language formatting scripts have been dramatically simplified given changes to export paths all being the same.
+- The `update_files` directory was removed in preparation of other means of showing data totals.
 
 ## Scribe-Data 3.3.0
 

diff --git a/docs/source/scribe_data/cli.rst b/docs/source/scribe_data/cli.rst
@@ -105,7 +105,7 @@ Behavior and Output:
 
     .. code-block:: text
 
-        Updating data for language: English, data type: ['verbs']
+        Updating data for language(s): English; data type(s): verbs
         Data updated:   0%|
 
 2. If existing files are found, you'll be prompted to choose an option:

diff --git a/src/scribe_data/cli/convert.py b/src/scribe_data/cli/convert.py
@@ -24,7 +24,6 @@
 import json
 import shutil
 from pathlib import Path
-from typing import Optional
 
 from scribe_data.cli.cli_utils import language_map
 from scribe_data.load.data_to_sqlite import data_to_sqlite
@@ -33,10 +32,33 @@
     get_language_iso,
 )
 
+# MARK: JSON
+
 
 def export_json(
     language: str, data_type: str, output_dir: Path, overwrite: bool
 ) -> None:
+    """
+    Export a JSON file from the CLI process.
+
+    Parameters
+    ----------
+        language : str
+            The language of the file to convert.
+
+        data_type : str
+            The data type to of the file to convert.
+
+        output_dir : str
+            The output directory path for results.
+
+        overwrite : bool
+            Whether to overwrite existing files.
+
+    Returns
+    -------
+        A JSON file saved in the given location.
+    """
     normalized_language = language_map.get(language.lower())
 
     if not normalized_language:
@@ -85,9 +107,40 @@ def export_json(
     )
 
 
+# MARK: CSV or TSV
+
+
 def convert_to_csv_or_tsv(
-    language: str, data_type: list, output_dir: Path, overwrite: bool, output_type: str
+    language: str,
+    data_type: list,
+    output_dir: Path,
+    overwrite: bool,
+    output_type: str,
 ) -> None:
+    """
+    Converts a Scribe-Data output file to a CSV or TSV file.
+
+    Parameters
+    ----------
+        output_type : str
+            The file type to convert to (CSV or TSV).
+
+        language : str
+            The language of the file to convert.
+
+        data_type : str
+            The data type to of the file to convert.
+
+        output_dir : str
+            The output directory path for results.
+
+        overwrite : bool
+            Whether to overwrite existing files.
+
+    Returns
+    -------
+        A CSV or TSV file saved in the given location.
+    """
     normalized_language = language_map.get(language.lower())
     if not normalized_language:
         print(f"Language '{language}' is not recognized.")
@@ -154,12 +207,36 @@ def convert_to_csv_or_tsv(
         print(f"Data for '{dtype}' written to '{output_file}'")
 
 
+# MARK: SQLITE
+
+
 def convert_to_sqlite(
-    language: Optional[str] = None,
-    data_type: Optional[str] = None,
-    output_dir: Optional[str] = None,
-    overwrite: bool = False,
+    language: str,
+    data_type: str,
+    output_dir: Path,
+    overwrite: bool,
 ) -> None:
+    """
+    Converts a Scribe-Data output file to an SQLite file.
+
+    Parameters
+    ----------
+        language : str
+            The language of the file to convert.
+
+        data_type : str
+            The data type to of the file to convert.
+
+        output_dir : str
+            The output directory path for results.
+
+        overwrite : bool
+            Whether to overwrite existing files.
+
+    Returns
+    -------
+        A SQLite file saved in the given location.
+    """
     if not language:
         raise ValueError("Language must be specified for SQLite conversion.")
 
@@ -191,3 +268,52 @@ def convert_to_sqlite(
 
     else:
         print("No output directory specified. SQLite file remains in default location.")
+
+
+# MARK: Convert
+
+
+def convert(
+    language: str, data_type: str, output_dir: str, overwrite: bool, output_type: str
+):
+    """
+    Converts a Scribe-Data output file to a different file type.
+
+    Parameters
+    ----------
+        output_type : str
+            The file type to convert to (CSV or TSV).
+
+        language : str
+            The language of the file to convert.
+
+        data_type : str
+            The data type to of the file to convert.
+
+        output_dir : str
+            The output directory path for results.
+
+        overwrite : bool
+            Whether to overwrite existing files.
+
+    Returns
+    -------
+        A SQLite file saved in the given location.
+    """
+    if output_dir:
+        output_dir = Path(output_dir).resolve()
+        if not output_dir.exists():
+            output_dir.mkdir(parents=True, exist_ok=True)
+
+        if output_type == "json" or output_type is None:
+            export_json(language, data_type, output_dir, overwrite)
+
+        elif output_type in {"csv", "tsv"}:
+            convert_to_csv_or_tsv(
+                language, data_type, output_dir, overwrite, output_type
+            )
+
+        else:
+            raise ValueError(
+                "Unsupported output type. Please use 'json', 'csv', or 'tsv'."
+            )
diff --git a/src/scribe_data/cli/get.py b/src/scribe_data/cli/get.py
@@ -22,37 +22,82 @@
 
 import subprocess
 from pathlib import Path
-from typing import Optional
 
-from scribe_data.cli.convert import convert_to_csv_or_tsv, export_json
-from scribe_data.utils import DEFAULT_JSON_EXPORT_DIR
+from scribe_data.utils import (
+    DEFAULT_CSV_EXPORT_DIR,
+    DEFAULT_JSON_EXPORT_DIR,
+    DEFAULT_SQLITE_EXPORT_DIR,
+    DEFAULT_TSV_EXPORT_DIR,
+)
 from scribe_data.wikidata.query_data import query_data
 
-DATA_DIR = Path(DEFAULT_JSON_EXPORT_DIR)
-
 
 def get_data(
-    language: Optional[str] = None,
-    data_type: Optional[str] = None,
-    output_dir: Optional[str] = None,
+    language: str = None,
+    data_type: str = None,
+    output_type: str = None,
+    output_dir: str = None,
     overwrite: bool = False,
-    output_type: Optional[str] = None,
     outputs_per_entry: int = None,
     all: bool = False,
 ) -> None:
     """
     Function for controlling the data get process for the CLI.
+
+    Parameters
+    ----------
+        language : str
+            The language(s) to get.
+
+        data_type : str
+            The data type(s) to get.
+
+        output_type : str
+            The output file type.
+
+        output_dir : str
+            The output directory path for results.
+
+        outputs_per_entry : str
+            How many outputs should be generated per data entry.
+
+        overwrite : bool
+            Whether to overwrite existing files (default: False).
+
+        all : bool
+            Get all languages and data types.
+
+    Returns
+    -------
+        The requested data saved locally given file type and location arguments.
     """
+    # MARK: Defaults
+
+    output_type = output_type or "json"
+    if output_dir is None:
+        if output_type == "csv":
+            output_dir = DEFAULT_CSV_EXPORT_DIR
+        elif output_type == "json":
+            output_dir = DEFAULT_JSON_EXPORT_DIR
+        elif output_type == "sqlite":
+            output_dir = DEFAULT_SQLITE_EXPORT_DIR
+        elif output_type == "tsv":
+            output_dir = DEFAULT_TSV_EXPORT_DIR
+
     languages = [language] if language else None
 
     subprocess_result = False
 
+    # MARK: Get All
+
     if all:
         print("Updating all languages and data types ...")
         query_data(None, None, overwrite)
         subprocess_result = True
 
-    elif data_type in ["emoji-keywords", "emoji_keywords"]:
+    # MARK: Emojis
+
+    elif data_type in {"emoji-keywords", "emoji_keywords"}:
         for lang in languages:
             emoji_keyword_extraction_script = (
                 Path(__file__).parent.parent
@@ -66,6 +111,8 @@ def get_data(
                 ["python", emoji_keyword_extraction_script]
             )
 
+    # MARK: Translations
+
     elif data_type == "translations":
         for lang in languages:
             translation_generation_script = (
@@ -80,47 +127,38 @@ def get_data(
                 ["python", translation_generation_script]
             )
 
+    # MARK: Query Data
+
     elif language or data_type:
         data_type = data_type[0] if isinstance(data_type, list) else data_type
 
         data_type = [data_type] if data_type else None
-        print(f"Updating data for language: {language}, data type: {data_type}")
-        query_data(languages, data_type, overwrite)
+        print(
+            f"Updating data for language(s): {language}; data type(s): {', '.join(data_type)}"
+        )
+        query_data(
+            languages=languages,
+            data_type=data_type,
+            output_dir=output_dir,
+            overwrite=overwrite,
+        )
         subprocess_result = True
 
     else:
         raise ValueError(
             "You must provide at least one of the --language (-l) or --data-type (-dt) options, or use --all (-a)."
         )
 
-    if output_dir:
-        output_dir = Path(output_dir).resolve()
-        if not output_dir.exists():
-            output_dir.mkdir(parents=True, exist_ok=True)
-
-        if output_type == "json" or output_type is None:
-            export_json(language, data_type, output_dir, overwrite)
-
-        elif output_type in ["csv", "tsv"]:
-            convert_to_csv_or_tsv(
-                language, data_type, output_dir, overwrite, output_type
-            )
-
-        else:
-            raise ValueError(
-                "Unsupported output type. Please use 'json', 'csv', or 'tsv'."
-            )
-
-    elif (
+    if (
         isinstance(subprocess_result, subprocess.CompletedProcess)
         and subprocess_result.returncode != 1
     ) or (isinstance(subprocess_result, bool) and subprocess_result is not False):
         print(
-            "No output directory specified for exporting results.",
-            f"Updated data was saved in: {Path(DEFAULT_JSON_EXPORT_DIR).resolve()}.",
+            f"Updated data was saved in: {Path(output_dir).resolve()}.",
         )
 
-    elif data_type in ["emoji-keywords", "emoji_keywords"]:
+    # The emoji keywords process has failed.
+    elif data_type in {"emoji-keywords", "emoji_keywords"}:
         print(
             "\nThe Scribe-Data emoji functionality is powered by PyICU, which is currently not installed."
         )