diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..7fe70d7
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+*.json filter=lfs diff=lfs merge=lfs -text
diff --git a/.github/workflows/add_dunder_methods.yaml b/.github/workflows/add_dunder_methods.yaml
new file mode 100644
index 0000000..c56c931
--- /dev/null
+++ b/.github/workflows/add_dunder_methods.yaml
@@ -0,0 +1,37 @@
+name: add dunder methods to genome_annotation model
+
+on:
+  push:
+    paths:
+      - 'bkbit/models/genome_annotation.py'
+
+permissions:
+  contents: write
+
+jobs:
+  run-script:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout this repository
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.9
+    
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+
+      
+      - name: Run add_dunderMethods_genomeAnnotation
+        run: python bkbit/model_editors/add_dunderMethods_genomeAnnotation.py
+
+      - name: Commit changes
+        run: |
+          git config --global user.name 'github-actions'
+          git config --global user.email 'github-actions@github.com'
+          git add bkbit/models/genome_annotation.py
+          git commit -m 'Update genome_annotation.py with dunder methods'
+          git push
\ No newline at end of file
diff --git a/bkbit/cli.py b/bkbit/cli.py
index 8201ab7..3c8eff2 100644
--- a/bkbit/cli.py
+++ b/bkbit/cli.py
@@ -3,6 +3,8 @@
 from bkbit.data_translators.library_generation_translator import specimen2jsonld
 from bkbit.model_converters.yaml2sheet_converter import yaml2cvs
 from bkbit.data_translators.file_manifest_translator import filemanifest2jsonld
+from bkbit.data_translators.genome_annotation_translator import gff2jsonld
+from bkbit.utils.get_ncbi_taxonomy import download_ncbi_taxonomy
 
 @click.group()
 def cli():
@@ -14,6 +16,8 @@ def cli():
 cli.add_command(specimen2jsonld)
 cli.add_command(yaml2cvs)
 cli.add_command(filemanifest2jsonld)
+cli.add_command(gff2jsonld)
+cli.add_command(download_ncbi_taxonomy)
 
 if __name__ == '__main__':
     cli()
diff --git a/bkbit/data_translators/README.md b/bkbit/data_translators/README.md
index a3178fb..a4f1987 100644
--- a/bkbit/data_translators/README.md
+++ b/bkbit/data_translators/README.md
@@ -95,4 +95,111 @@ ls .
 DO-XIQQ6047.jsonld
 DO-WFFF3774.jsonld
 DO-RMRL6873.jsonld
+# genome_annotation_translator.py
+
+## Overview
+genome_annotation_translator uses annotated genome data in GFF3 format to generate respective data objects representing genes, genome assemblies, and organisms. All data object are defined in the [Genome Annotation Schema](https://brain-bican.github.io/models/index_genome_annotation/).<br>
+Each jsonld file will contain:
+- GeneAnnotation objects
+- 1 GenomeAnnotation object
+- 1 GenomeAssembly object
+- 1 OrganismTaxon object
+- 1 Checksum object
+
+
+
+## Command Line
+### gen-geneannotation
+```python
+gen-geneannotation [OPTIONS] GFF3_URL 
+```
+
+#### Options
+<span style="color: red;">-a, --assembly_accession</span> <br> 
+&emsp;ID assigned to the genomic assembly used in the GFF3 file. <br>
+&emsp;<b>*Note*</b>: Must be provided when using ENSEMBL GFF3 files
+
+<span style="color: red;">-s, --assembly_strain</span> <br>
+&emsp;Specific strain of the organism associated with the GFF3 file.
+
+<span style="color: red;">-l, --log_level</span> <br>
+&emsp;Logging level. <br>
+&emsp;DEFAULT:<br>
+&emsp;&emsp;'WARNING'<br>
+&emsp;OPTIONS:<br>
+&emsp;&emsp;DEBUG | INFO | WARNING | ERROR | CRITICIAL 
+
+<span style="color: red;">-f, --log_to_file</span> <br>
+&emsp;Log to a file instead of the console. <br>
+&emsp;DEFAULT:<br>
+&emsp;&emsp;False <br>
+
+## Examples
+#### Example 1: NCBI GFF3 File 
+
+```python
+pip install bkbit
+
+gen-geneannotation 'https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9823/106/GCF_000003025.6_Sscrofa11.1/GCF_000003025.6_Sscrofa11.1_genomic.gff.gz' > output.jsonld
+```
+
+#### Example 2: ENSEMBL GFF3 File 
+
+```python
+pip install bkbit
+
+# genome_annotation_translator.py
+
+## Overview
+genome_annotation_translator uses annotated genome data in GFF3 format to generate respective data objects representing genes, genome assemblies, and organisms. All data object are defined in the [Genome Annotation Schema](https://brain-bican.github.io/models/index_genome_annotation/).<br>
+Each jsonld file will contain:
+- GeneAnnotation objects
+- 1 GenomeAnnotation object
+- 1 GenomeAssembly object
+- 1 OrganismTaxon object
+- 1 Checksum object
+
+
+
+## Command Line
+### gen-geneannotation
+```python
+gen-geneannotation [OPTIONS] GFF3_URL 
+```
+
+#### Options
+<span style="color: red;">-a, --assembly_accession</span> <br> 
+&emsp;ID assigned to the genomic assembly used in the GFF3 file. <br>
+&emsp;<b>*Note*</b>: Must be provided when using ENSEMBL GFF3 files
+
+<span style="color: red;">-s, --assembly_strain</span> <br>
+&emsp;Specific strain of the organism associated with the GFF3 file.
+
+<span style="color: red;">-l, --log_level</span> <br>
+&emsp;Logging level. <br>
+&emsp;DEFAULT:<br>
+&emsp;&emsp;'WARNING'<br>
+&emsp;OPTIONS:<br>
+&emsp;&emsp;DEBUG | INFO | WARNING | ERROR | CRITICIAL 
+
+<span style="color: red;">-f, --log_to_file</span> <br>
+&emsp;Log to a file instead of the console. <br>
+&emsp;DEFAULT:<br>
+&emsp;&emsp;False <br>
+
+## Examples
+#### Example 1: NCBI GFF3 File 
+
+```python
+pip install bkbit
+
+gen-geneannotation 'https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9823/106/GCF_000003025.6_Sscrofa11.1/GCF_000003025.6_Sscrofa11.1_genomic.gff.gz' > output.jsonld
+```
+
+#### Example 2: ENSEMBL GFF3 File 
+
+```python
+pip install bkbit
+
+gen-geneannotation -a 'GCF_003339765.1' 'https://ftp.ensembl.org/pub/release-104/gff3/macaca_mulatta/Macaca_mulatta.Mmul_10.104.gff3.gz' > output.jsonld
 ```
\ No newline at end of file
diff --git a/bkbit/data_translators/genome_annotation_translator.py b/bkbit/data_translators/genome_annotation_translator.py
index 625c237..0615853 100644
--- a/bkbit/data_translators/genome_annotation_translator.py
+++ b/bkbit/data_translators/genome_annotation_translator.py
@@ -1,71 +1,77 @@
+"""
+Module for downloading, parsing, and processing GFF3 files from NCBI and Ensembl repositories. This module provides functionality to:
+
+1. Download a GFF3 file from a specified URL and calculate its checksums.
+2. Parse the GFF3 file to extract gene annotations.
+3. Generate various metadata objects such as organism taxon, genome assembly, and genome annotation.
+4. Serialize the extracted information into JSON-LD format for further use.
+
+Classes:
+    Gff3: A class to handle the entire process of downloading, parsing, and processing GFF3 files.
+
+Functions:
+    cli: Command line interface function to execute the module as a script.
+
+Usage:
+    The module can be run as a standalone script by executing it with appropriate arguments and options:
+    
+    ```
+    python genome_annotation_translator.py <content_url> -a <assembly_accession> -s <assembly_strain> -l <log_level> -f
+    ```
+    The script will download the GFF3 file from the specified URL, parse it, and serialize the extracted information into JSON-LD format.
+
+Example:
+    ```
+    python genome_annotation_translator.py "https://example.com/path/to/gff3.gz" -a "GCF_000001405.39" -s "strain_name" -l "INFO" -f True
+    ```
+    
+Dependencies:
+    - re
+    - hashlib
+    - tempfile
+    - uuid
+    - urllib
+    - urllib.request
+    - urllib.parse
+    - os
+    - json
+    - datetime
+    - collections.defaultdict
+    - subprocess
+    - gzip
+    - tqdm
+    - click
+    - pkg_resources
+    - bkbit.models.genome_annotation as ga
+    - bkbit.utils.setup_logger as setup_logger
+    - bkbit.utils.load_json as load_json    
+"""
+
 import re
 import hashlib
 import tempfile
 import uuid
-import logging
 import urllib
 import urllib.request
+from urllib.parse import urlparse
 import os
 import json
 from datetime import datetime
 from collections import defaultdict
 import subprocess
 import gzip
+import sys
 from tqdm import tqdm
+import click
+import pkg_resources
 from bkbit.models import genome_annotation as ga
+from bkbit.utils.setup_logger import setup_logger
+from bkbit.utils.load_json import load_json
 
 
-logging.basicConfig(
-    filename="gff3_translator_" + datetime.now().strftime("%Y-%m-%d_%H:%M:%S") + ".log",
-    format="%(levelname)s: %(message)s (%(asctime)s)",
-    datefmt="%m/%d/%Y %I:%M:%S %p",
-    level=logging.INFO,
-)
-logger = logging.getLogger(__name__)
-
 
 ## CONSTANTS ##
 
-TAXON_SCIENTIFIC_NAME = {
-    "9606": "Homo sapiens",
-    "10090": "Mus musculus",
-    "9544": "Macaca mulatta",
-    "9483": "Callithrix jacchus",
-    "60711": "Chlorocebus sabaeus",
-    "9361": "Dasypus novemcinctus",
-    "9685": "Felis catus",
-    "9669": "Mustela putorius furo",
-    "30611": "Otolemur garnettii",
-    "9593": "Gorilla gorilla",
-    "13616": "Monodelphis domestica",
-    "9823": "Sus scrofa",
-    "9986": "Oryctolagus cuniculus",
-    "10116": "Rattus norvegicus",
-    "27679": "Saimiri boliviensis",
-    "246437": "Tupaia belangeri chinensis",
-    "9407": "Rousettus aegyptiacus",
-    "9598": "Pan troglodytes"
-}
-TAXON_COMMON_NAME = {
-    "9606": "human",
-    "10090": "mouse",
-    "9544": "rhesus macaque",
-    "9483": "common marmoset",
-    "60711": "green monkey",
-    "9361": "nine-banded armadillo",
-    "9685": "cat",
-    "9669": "ferret",
-    "30611": "galago",
-    "9593": "gorilla",
-    "13616":"gray short-tailed opossum",
-    "9823": "pig",
-    "9986": "rabbit",
-    "10116": "rat",
-    "27679": "squirrel monkey",
-    "246437": "Chinese tree shrew",
-    "9407": "egyptian fruit bat",
-    "9598": "chimpanzee"
-}
 PREFIX_MAP = {
     "NCBITaxon": "http://purl.obolibrary.org/obo/NCBITaxon_",
     "NCBIGene": "http://identifiers.org/ncbigene/",
@@ -81,72 +87,273 @@
     "{authority} {taxon_scientific_name} Annotation Release {genome_version}"
 )
 DEFAULT_FEATURE_FILTER = ("gene", "pseudogene", "ncRNA_gene")
-DEFAULT_HASH = ("SHA256", "MD5")
-
+DEFAULT_HASH = ("MD5",)
+LOG_FILE_NAME = (
+    "gff3_translator_" + datetime.now().strftime("%Y-%m-%d_%H:%M:%S") + ".log"
+)
+TAXON_DIR_PATH = "../utils/ncbi_taxonomy/"
+SCIENTIFIC_NAME_TO_TAXONID_PATH = pkg_resources.resource_filename(__name__, TAXON_DIR_PATH + "scientific_name_to_taxid.json")
+TAXON_SCIENTIFIC_NAME_PATH = pkg_resources.resource_filename(__name__, TAXON_DIR_PATH + "taxid_to_scientific_name.json")
+TAXON_COMMON_NAME_PATH = pkg_resources.resource_filename(__name__, TAXON_DIR_PATH + "taxid_to_common_name.json")
 
 class Gff3:
+    """
+    A class to handle the downloading, parsing, and processing of GFF3 files from NCBI and Ensembl repositories.
+
+    Attributes:
+        content_url (str): The URL of the GFF file.
+        assembly_accession (str): The ID of the genome assembly.
+        assembly_strain (str, optional): The strain of the genome assembly. Defaults to None.
+        log_level (str): The logging level. Defaults to 'WARNING'.
+        log_to_file (bool): Flag to log messages to a file. Defaults to False.
+
+    Methods:
+        __init__(content_url, assembly_accession=None, assembly_strain=None, log_level="WARNING", log_to_file=False):
+            Initializes the Gff3 class with the provided parameters.
+
+        parse_url():
+            Parses the content URL and extracts information about the genome annotation.
+
+        __download_gff_file():
+            Downloads a GFF file from a given URL and calculates the MD5, SHA256, and SHA1 hashes.
+
+        generate_organism_taxon(taxon_id):
+            Generates an organism taxon object based on the provided taxon ID.
+
+        assign_authority_type(authority):
+            Assigns the authority type based on the given authority string.
+
+        generate_genome_assembly(assembly_id, assembly_version, assembly_label, assembly_strain=None):
+            Generates a genome assembly object based on the provided parameters.
+
+        generate_genome_annotation(genome_label, genome_version):
+            Generates a genome annotation object based on the provided parameters.
+
+        generate_digest(hash_values, hash_functions=DEFAULT_HASH):
+            Generates checksum digests for the GFF file using the specified hash functions.
+
+        __get_line_count(file_path):
+            Returns the line count of a file.
+
+        parse(feature_filter=DEFAULT_FEATURE_FILTER):
+            Parses the GFF file and extracts gene annotations based on the provided feature filter.
+
+        generate_ensembl_gene_annotation(attributes, curr_line_num):
+            Generates a GeneAnnotation object for Ensembl based on the provided attributes.
+
+        generate_ncbi_gene_annotation(attributes, curr_line_num):
+            Generates a GeneAnnotation object for NCBI based on the provided attributes.
+
+        __get_attribute(attributes, attribute_name, curr_line_num):
+            Retrieves the value of a specific attribute from the given attributes dictionary.
+
+        __resolve_ncbi_gene_annotation(new_gene_annotation, curr_line_num):
+            Resolves conflicts between existing and new gene annotations based on certain conditions.
+
+        __merge_values(t):
+            Merges values from a list of lists into a dictionary of sets.
+
+        serialize_to_jsonld(exclude_none=True, exclude_unset=False):
+            Serializes the object and either writes it to the specified output file or prints it to the CLI.
+    """
+
     def __init__(
         self,
         content_url,
-        taxon_id,
-        assembly_id,
-        assembly_version,
-        assembly_label,
-        genome_label: str,
-        genome_version: str,
-        genome_authority: str,
-        hash_functions: tuple[str] = DEFAULT_HASH,
+        assembly_accession=None,
         assembly_strain=None,
-        gff_file=None,
+        log_level="WARNING",
+        log_to_file=False,
     ):
         """
         Initializes an instance of the GFFTranslator class.
 
         Parameters:
         - content_url (str): The URL of the GFF file.
-        - taxon_id (int): The taxon ID of the organism.
         - assembly_id (str): The ID of the genome assembly.
-        - assembly_version (str): The version of the genome assembly.
-        - assembly_label (str): The label of the genome assembly.
-        - genome_label (str): The label of the genome.
-        - genome_version (str): The version of the genome.
-        - genome_authority (str): The authority responsible for the genome.
-        - hash_functions (tuple[str]): A list of hash functions to use for generating checksums. Defaults to ("SHA256", "MD5").
         - assembly_strain (str, optional): The strain of the genome assembly. Defaults to None.
-        - gff_file (str, optional): The local path to the GFF file if file is already downloaded. Defaults to None.
+        - hash_functions (tuple[str]): A tuple of hash functions to use for generating checksums. Defaults to ('MD5').
         """
-        self.logger = logger
+        self.logger = setup_logger(LOG_FILE_NAME, log_level, log_to_file)
+        try:
+            self.scientific_name_to_taxonid = load_json(SCIENTIFIC_NAME_TO_TAXONID_PATH)
+            self.taxon_scientific_name = load_json(TAXON_SCIENTIFIC_NAME_PATH)
+            self.taxon_common_name = load_json(TAXON_COMMON_NAME_PATH)
+        except FileNotFoundError as e:
+            self.logger.critical("NCBI Taxonomy not downloaded. Run 'bkbit download_ncbi_taxonomy' command first." )
+            print(e)
+            sys.exit(2)
+
         self.content_url = content_url
-        if gff_file is None:
-            self.gff_file = self.__download_gff_file()
-        else:
-            self.gff_file = gff_file
-        self.authority = self.assign_authority_type(genome_authority)
+
+        ## STEP 1: Parse the content URL to get metadata
+        # Parse content_url to get metadata
+        url_metadata = self.parse_url()
+        if url_metadata is None:
+            self.logger.critical(
+                "The provided content URL is not supported. Please provide a valid URL."
+            )
+            raise ValueError(
+                "The provided content URL is not supported. Please provide a valid URL."
+            )
+
+        # Define variables to store metadata
+        (
+            taxon_id,
+            assembly_id,
+            assembly_version,
+            assembly_label,
+            genome_label,
+            genome_version,
+        ) = (None, None, None, None, None, None)
+
+        # Assign the authority type
+        self.authority = url_metadata.get("authority")
+
+        # Assign the taxon_id and assembly_id based on the authority
+        if self.authority.value == ga.AuthorityType.NCBI.value:
+            taxon_id = url_metadata.get("taxonid")
+            assembly_id = url_metadata.get("assembly_accession")
+        elif self.authority.value == ga.AuthorityType.ENSEMBL.value:
+            taxon_id = self.scientific_name_to_taxonid.get(
+                url_metadata.get("scientific_name").replace("_", " ")
+            )
+            if assembly_accession is None:
+                self.logger.critical(
+                    "The assembly ID is required for Ensembl URLs. Please provide the assembly ID."
+                )
+                raise ValueError(
+                    "The assembly ID is required for Ensembl URLs. Please provide the assembly ID."
+                )
+            assembly_id = assembly_accession
+
+        # Assign assembly_version, assembly_label, genome_version, and genome_label
+        assembly_version = (
+            assembly_id.split(".")[1] if len(assembly_id.split(".")) >= 1 else None
+        )
+        assembly_label = url_metadata.get("assembly_name")
+        genome_version = url_metadata.get("release_version")
+        genome_label = self.authority.value + "-" + taxon_id + "-" + genome_version
+
+        ## STEP 2: Download the GFF file
+        # Download the GFF file
+        self.gff_file, hash_values = self.__download_gff_file()
+
+        ## STEP 3: Generate the organism taxon, genome assembly, checksums, and genome annotation objects
+        # Generate the organism taxon object
         self.organism_taxon = self.generate_organism_taxon(taxon_id)
         self.genome_assembly = self.generate_genome_assembly(
             assembly_id, assembly_version, assembly_label, assembly_strain
         )
-        self.checksums = self.generate_digest(hash_functions)
+        self.checksums = self.generate_digest(hash_values, DEFAULT_HASH)
         self.genome_annotation = self.generate_genome_annotation(
             genome_label, genome_version
         )
+
         self.gene_annotations = {}
 
+    def parse_url(self):
+        """
+        Parses the content URL and extracts information about the genome annotation.
+
+        Returns:
+            A dictionary containing the following information:
+            - 'authority': The authority type (NCBI or ENSEMBL).
+            - 'taxonid': The taxon ID of the genome.
+            - 'release_version': The release version of the genome annotation.
+            - 'assembly_accession': The assembly accession of the genome.
+            - 'assembly_name': The name of the assembly.
+            - 'species': The species name (only for ENSEMBL URLs).
+        """
+        # Define regex patterns for NCBI and Ensembl URLs
+        # NCBI : [assembly accession.version]_[assembly name]_[content type].[optional format]
+        # ENSEMBL :  <species>.<assembly>.<_version>.gff3.gz -> organism full name, assembly name, genome version
+        ncbi_pattern = r"/genomes/all/annotation_releases/(\d+)(?:/(\d+))?/(GCF_\d+\.\d+)[_-]([^/]+)/(GCF_\d+\.\d+)[_-]([^/]+)_genomic\.gff\.gz"
+        ensembl_pattern = (
+            r"/pub/release-(\d+)/gff3/([^/]+)/([^/.]+)\.([^/.]+)\.([^/.]+)\.gff3\.gz"
+        )
+
+        # Parse the URL to get the path
+        parsed_url = urlparse(self.content_url)
+        path = parsed_url.path
+
+        # Determine if the URL is from NCBI or Ensembl and extract information
+        if "ncbi" in parsed_url.netloc:
+            ncbi_match = re.search(ncbi_pattern, path)
+            if ncbi_match:
+                return {
+                    "authority": ga.AuthorityType.NCBI,
+                    "taxonid": ncbi_match.group(1),
+                    "release_version": (
+                        ncbi_match.group(2)
+                        if ncbi_match.group(2)
+                        else ncbi_match.group(4)
+                    ),
+                    "assembly_accession": ncbi_match.group(3),
+                    "assembly_name": ncbi_match.group(6),
+                }
+
+        elif "ensembl" in parsed_url.netloc:
+            ensembl_match = re.search(ensembl_pattern, path)
+            if ensembl_match:
+                return {
+                    "authority": ga.AuthorityType.ENSEMBL,
+                    "release_version": ensembl_match.group(1),
+                    "scientific_name": ensembl_match.group(3),
+                    "assembly_name": ensembl_match.group(4),
+                }
+
+        # If no match is found, return None
+        return None
+
     def __download_gff_file(self):
         """
-        Downloads a GFF file from the specified content URL, decompresses it, and returns the path to the temporary file.
+        Downloads a GFF file from a given URL and calculates the MD5, SHA256, and SHA1 hashes.
 
         Returns:
-            str: The path to the temporary file containing the decompressed GFF data.
+            tuple: A tuple containing the path to the downloaded gzip file and a dictionary
+            with the MD5, SHA256, and SHA1 hashes of the file.
         """
-        with urllib.request.urlopen(self.content_url) as response:
-            gzip_data = response.read()
+        response = urllib.request.urlopen(self.content_url)
+        total_size = int(response.headers.get("content-length", 0))
+        block_size = 1024  # 1 Kilobyte
+
+        # Create hash objects
+        md5_hash = hashlib.md5()
+        sha256_hash = hashlib.sha256()
+        sha1_hash = hashlib.sha1()
 
         # Create a temporary file for the gzip data
         with tempfile.NamedTemporaryFile(suffix=".gz", delete=False) as f_gzip:
-            f_gzip.write(gzip_data)
             gzip_file_path = f_gzip.name
-        return gzip_file_path
+
+            # Create a progress bar
+            progress_bar = tqdm(
+                total=total_size,
+                unit="iB",
+                unit_scale=True,
+                desc="Downloading GFF file",
+            )
+
+            # Read the file in chunks, write to the temporary file, and update the hash
+            while True:
+                data = response.read(block_size)
+                if not data:
+                    break
+                f_gzip.write(data)
+                md5_hash.update(data)
+                sha256_hash.update(data)
+                sha1_hash.update(data)
+                progress_bar.update(len(data))
+
+            progress_bar.close()
+
+        # Return the path to the temporary file and the md5 hash
+        return gzip_file_path, {
+            "MD5": md5_hash.hexdigest(),
+            "SHA256": sha256_hash.hexdigest(),
+            "SHA1": sha1_hash.hexdigest(),
+        }
 
     def generate_organism_taxon(self, taxon_id: str):
         """
@@ -158,11 +365,10 @@ def generate_organism_taxon(self, taxon_id: str):
         Returns:
             ga.OrganismTaxon: The generated organism taxon object.
         """
-        self.logger.debug("Generating organism taxon")
         return ga.OrganismTaxon(
             id=TAXON_PREFIX + ":" + taxon_id,
-            full_name=TAXON_SCIENTIFIC_NAME[taxon_id],
-            name=TAXON_COMMON_NAME[taxon_id],
+            full_name=self.taxon_scientific_name[taxon_id],
+            name=self.taxon_common_name[taxon_id],
             iri=PREFIX_MAP[TAXON_PREFIX] + taxon_id,
         )
 
@@ -179,12 +385,11 @@ def assign_authority_type(self, authority: str):
         Raises:
             Exception: If the authority is not supported. Only NCBI and Ensembl authorities are supported.
         """
-        self.logger.debug("Assigning authority type")
         if authority.upper() == ga.AuthorityType.NCBI.value:
             return ga.AuthorityType.NCBI
         if authority.upper() == ga.AuthorityType.ENSEMBL.value:
             return ga.AuthorityType.ENSEMBL
-        logger.critical(
+        self.logger.critical(
             "Authority %s is not supported. Please use NCBI or Ensembl.", authority
         )
         raise ValueError(
@@ -210,7 +415,6 @@ def generate_genome_assembly(
         Returns:
         ga.GenomeAssembly: The generated genome assembly object.
         """
-        self.logger.debug("Generating genome assembly")
         return ga.GenomeAssembly(
             id=ASSEMBLY_PREFIX + ":" + assembly_id,
             in_taxon=[self.organism_taxon.id],
@@ -231,7 +435,6 @@ def generate_genome_annotation(self, genome_label: str, genome_version: str):
         Returns:
             ga.GenomeAnnotation: The generated genome annotation.
         """
-        self.logger.debug("Generating genome annotation")
         return ga.GenomeAnnotation(
             id=BICAN_ANNOTATION_PREFIX + genome_label.upper(),
             digest=[checksum.id for checksum in self.checksums],
@@ -250,6 +453,7 @@ def generate_genome_annotation(self, genome_label: str, genome_version: str):
 
     def generate_digest(
         self,
+        hash_values: dict,
         hash_functions: tuple[str] = DEFAULT_HASH,
     ) -> list[ga.Checksum]:
         """
@@ -265,11 +469,7 @@ def generate_digest(
             ValueError: If an unsupported hash algorithm is provided.
 
         """
-        gff_data = open(
-            self.gff_file, "rb"
-        ).read()  # TODO: Modify this to read the file in chunks
         checksums = []
-
         for hash_type in hash_functions:
             # Generate a UUID version 4
             uuid_value = uuid.uuid4()
@@ -278,31 +478,32 @@ def generate_digest(
             urn = f"urn:uuid:{uuid_value}"
             hash_type = hash_type.strip().upper()
             # Create a Checksum object
-            if hash_type == "SHA256":
-                digest = hashlib.sha256(gff_data).hexdigest()
+            if hash_type == ga.DigestType.SHA256.name:
                 checksums.append(
                     ga.Checksum(
                         id=urn,
                         checksum_algorithm=ga.DigestType.SHA256,
-                        value=digest,
+                        value=hash_values.get("SHA256"),
                     )
                 )
-            elif hash_type == "MD5":
-                digest = hashlib.md5(gff_data).hexdigest()
+            elif hash_type == ga.DigestType.MD5.name:
                 checksums.append(
                     ga.Checksum(
-                        id=urn, checksum_algorithm=ga.DigestType.MD5, value=digest
+                        id=urn,
+                        checksum_algorithm=ga.DigestType.MD5,
+                        value=hash_values.get("MD5"),
                     )
                 )
-            elif hash_type == "SHA1":
-                digest = hashlib.sha1(gff_data).hexdigest()
+            elif hash_type == ga.DigestType.SHA1.name:
                 checksums.append(
                     ga.Checksum(
-                        id=urn, checksum_algorithm=ga.DigestType.SHA1, value=digest
+                        id=urn,
+                        checksum_algorithm=ga.DigestType.SHA1,
+                        value=hash_values.get("SHA1"),
                     )
                 )
             else:
-                logger.error(
+                self.logger.error(
                     "Hash algorithm %s is not supported. Please use SHA256, MD5, or SHA1.",
                     hash_type,
                 )
@@ -321,9 +522,9 @@ def __get_line_count(self, file_path):
 
         result = subprocess.run(
             ["wc", "-l", file_path], stdout=subprocess.PIPE, check=True
-        )  # If check is True and the exit code was non-zero, it raises a CalledProcessError. 
-           # The CalledProcessError object will have the return code in the returncode attribute,
-           # and output & stderr attributes if those streams were captured.
+        )  # If check is True and the exit code was non-zero, it raises a CalledProcessError.
+        # The CalledProcessError object will have the return code in the returncode attribute,
+        # and output & stderr attributes if those streams were captured.
         output = result.stdout.decode().strip()
         line_count = int(output.split()[0])  # Extract the line count from the output
         return line_count
@@ -362,9 +563,8 @@ def parse(self, feature_filter: tuple[str] = DEFAULT_FEATURE_FILTER):
             for line_raw in file:
                 line_strip = line_raw.strip()
                 if curr_line_num == 1 and not line_strip.startswith("##gff-version 3"):
-                    logger.critical(
-                        'Line %s: ##gff-version 3" missing from the first line.',
-                        curr_line_num,
+                    self.logger.warning(
+                        '"##gff-version 3" missing from the first line of the file. The given file may not be a valid GFF3 file.'
                     )
                 elif len(line_strip) == 0:  # blank line
                     continue
@@ -375,7 +575,7 @@ def parse(self, feature_filter: tuple[str] = DEFAULT_FEATURE_FILTER):
                 else:  # line may be a feature or unknown
                     tokens = list(map(str.strip, line_raw.split("\t")))
                     if len(tokens) != 9:
-                        logger.warning(
+                        self.logger.warning(
                             "Line %s: Features are expected 9 columns, found %s.",
                             curr_line_num,
                             len(tokens),
@@ -387,19 +587,13 @@ def parse(self, feature_filter: tuple[str] = DEFAULT_FEATURE_FILTER):
                             tuple(a.split("=") for a in tokens[8].split(";"))
                         )
                         # TODO: Write cleaner code that calls respective generate function based on the authority automatically
-                        if (
-                            self.genome_annotation.authority
-                            == ga.AuthorityType.ENSEMBL
-                        ):
+                        if self.genome_annotation.authority == ga.AuthorityType.ENSEMBL:
                             gene_annotation = self.generate_ensembl_gene_annotation(
                                 attributes, curr_line_num
                             )
                             if gene_annotation is not None:
                                 self.gene_annotations[gene_annotation] = gene_annotation
-                        elif (
-                            self.genome_annotation.authority
-                            == ga.AuthorityType.NCBI
-                        ):
+                        elif self.genome_annotation.authority == ga.AuthorityType.NCBI:
                             gene_annotation = self.generate_ncbi_gene_annotation(
                                 attributes, curr_line_num
                             )
@@ -483,14 +677,14 @@ def generate_ncbi_gene_annotation(self, attributes, curr_line_num):
             if len(geneid_values) == 1:
                 stable_id = geneid_values.pop()
         else:
-            logger.error(
+            self.logger.error(
                 "Line %s: No GeneAnnotation object created for this row due to missing dbxref attribute.",
                 curr_line_num,
             )
             return None
 
         if not stable_id:
-            logger.error(
+            self.logger.error(
                 "Line %s: No GeneAnnotation object created for this row due to number of GeneIDs provided in dbxref attribute is not equal to one.",
                 curr_line_num,
             )
@@ -513,7 +707,7 @@ def generate_ncbi_gene_annotation(self, attributes, curr_line_num):
             )
             synonyms.sort()  # note: this is not required, but it makes the output more predictable therefore easier to test
         else:
-            logger.warning(
+            self.logger.debug(
                 "Line %s: synonym is not set for this row's GeneAnnotation object due to missing gene_synonym attribute.",
                 curr_line_num,
             )
@@ -536,12 +730,12 @@ def generate_ncbi_gene_annotation(self, attributes, curr_line_num):
                     gene_annotation, curr_line_num
                 )
             if name != self.gene_annotations[gene_annotation.id].name:
-                logger.warning(
+                self.logger.debug(
                     "Line %s: GeneAnnotation object with id %s already exists with a different name. Current name: %s, Existing name: %s",
                     curr_line_num,
                     stable_id,
                     name,
-                    self.gene_annotations[gene_annotation.id].name
+                    self.gene_annotations[gene_annotation.id].name,
                 )
                 return None
         return gene_annotation
@@ -561,7 +755,7 @@ def __get_attribute(self, attributes, attribute_name, curr_line_num):
         value = None
         if attribute_name in attributes:
             if len(attributes[attribute_name]) != 1:
-                logger.warning(
+                self.logger.debug(
                     "Line %s: %s not set for this row's GeneAnnotation object due to more than one %s provided.",
                     curr_line_num,
                     attribute_name,
@@ -576,7 +770,7 @@ def __get_attribute(self, attributes, attribute_name, curr_line_num):
             else:
                 value = attributes[attribute_name].pop()
                 if value.find(",") != -1:
-                    logger.warning(
+                    self.logger.debug(
                         'Line %s: %s not set for this row\'s GeneAnnotation object due to value of %s attribute containing ",".',
                         curr_line_num,
                         attribute_name,
@@ -584,7 +778,7 @@ def __get_attribute(self, attributes, attribute_name, curr_line_num):
                     )
                     value = None
         else:
-            logger.warning(
+            self.logger.debug(
                 "Line %s: %s not set for this row's GeneAnnotation object due to missing %s attribute.",
                 curr_line_num,
                 attribute_name,
@@ -610,19 +804,14 @@ def __resolve_ncbi_gene_annotation(self, new_gene_annotation, curr_line_num):
 
         """
         existing_gene_annotation = self.gene_annotations[new_gene_annotation.id]
-        if (
-            existing_gene_annotation.description is None
-            and new_gene_annotation.description is not None
-        ):
-            return new_gene_annotation
         if (
             existing_gene_annotation.description is not None
             and new_gene_annotation.description is None
         ):
             return None
         if (
-            existing_gene_annotation.molecular_type is None
-            and new_gene_annotation.molecular_type is not None
+            existing_gene_annotation.description is None
+            and new_gene_annotation.description is not None
         ):
             return new_gene_annotation
         if (
@@ -631,17 +820,17 @@ def __resolve_ncbi_gene_annotation(self, new_gene_annotation, curr_line_num):
         ):
             return None
         if (
-            existing_gene_annotation.molecular_type == ga.BioType.noncoding.value
-            and new_gene_annotation.molecular_type != ga.BioType.noncoding.value
+            existing_gene_annotation.molecular_type is None
+            and new_gene_annotation.molecular_type is not None
         ):
             return new_gene_annotation
-        if (
-            existing_gene_annotation.molecular_type != ga.BioType.noncoding.value
-            and new_gene_annotation.molecular_type == ga.BioType.noncoding.value
-        ):
+        if existing_gene_annotation.molecular_type == ga.BioType.protein_coding.value:
             return None
-        logger.critical(
-            "Line %s: Unable to resolve duplicates for GeneID: %s.\nexisting gene: %s\nnew gene:      %s",
+        if new_gene_annotation.molecular_type == ga.BioType.protein_coding.value:
+            return new_gene_annotation
+
+        self.logger.error(
+            "Line %s: Unable to resolve duplicates for GeneID: %s.\nexisting gene: %s\nnew gene: %s",
             curr_line_num,
             new_gene_annotation.id,
             existing_gene_annotation,
@@ -660,7 +849,6 @@ def __merge_values(self, t):
             dict: A dictionary where each key maps to a set of values.
 
         """
-        self.logger.debug("Merging values")
         result = defaultdict(set)
         for lst in t:
             key = lst[0].strip()
@@ -670,45 +858,85 @@ def __merge_values(self, t):
         return result
 
     def serialize_to_jsonld(
-        self, output_file: str, exclude_none: bool = True, exclude_unset: bool = False
+        self, exclude_none: bool = True, exclude_unset: bool = False
     ):
         """
-        Serialize the object and write it to the specified output file.
+        Serialize the object and either write it to the specified output file or print it to the CLI.
 
         Parameters:
-            output_file (str): The path of the output file.
+            exclude_none (bool): Whether to exclude None values in the output.
+            exclude_unset (bool): Whether to exclude unset values in the output.
 
         Returns:
             None
         """
-        logger.debug("Serializing to JSON-LD")
-        with open(output_file, "w", encoding="utf-8") as f:
-            data = [
-                self.organism_taxon.dict(
-                    exclude_none=exclude_none, exclude_unset=exclude_unset
-                ),
-                self.genome_assembly.dict(
-                    exclude_none=exclude_none, exclude_unset=exclude_unset
-                ),
-                self.genome_annotation.dict(
-                    exclude_none=exclude_none, exclude_unset=exclude_unset
-                ),
-            ]
-            for ck in self.checksums:
-                data.append(
-                    ck.dict(exclude_none=exclude_none, exclude_unset=exclude_unset)
-                )
-            for ga in self.gene_annotations.values():
-                data.append(
-                    ga.dict(exclude_none=exclude_none, exclude_unset=exclude_unset)
-                )
 
-            output_data = {
-                "@context": "https://raw.githubusercontent.com/brain-bican/models/main/jsonld-context-autogen/genome_annotation.context.jsonld",
-                "@graph": data,
-            }
-            f.write(json.dumps(output_data, indent=2))
+        data = [
+            self.organism_taxon.dict(
+                exclude_none=exclude_none, exclude_unset=exclude_unset
+            ),
+            self.genome_assembly.dict(
+                exclude_none=exclude_none, exclude_unset=exclude_unset
+            ),
+            self.genome_annotation.dict(
+                exclude_none=exclude_none, exclude_unset=exclude_unset
+            ),
+        ]
+        for ck in self.checksums:
+            data.append(ck.dict(exclude_none=exclude_none, exclude_unset=exclude_unset))
+        for ga in self.gene_annotations.values():
+            data.append(ga.dict(exclude_none=exclude_none, exclude_unset=exclude_unset))
+
+        output_data = {
+            "@context": "https://raw.githubusercontent.com/brain-bican/models/main/jsonld-context-autogen/genome_annotation.context.jsonld",
+            "@graph": data,
+        }
+
+        print(json.dumps(output_data, indent=2))
+
+
+@click.command()
+##ARGUEMENTS##
+# Argument #1: The URL of the GFF file
+@click.argument("content_url", type=str)
+
+##OPTIONS##
+# Option #1: The ID of the genome assembly
+@click.option("assembly_accession", "-a", required=False, default=None, type=str)
+# Option #2: The strain of the genome assembly
+@click.option(
+    "--assembly_strain",
+    "-s",
+    required=False,
+    default=None,
+    type=str,
+    help="The strain of the genome assembly. Defaults to None.",
+)
+# Option #3: The log level
+@click.option(
+    "--log_level",
+    "-l",
+    required=False,
+    default="WARNING",
+    help="The log level. Defaults to WARNING.",
+)
+# Option #4: Log to file
+@click.option(
+    "--log_to_file",
+    "-f",
+    is_flag=True,
+    help="Log to a file instead of the console.",
+)
+def gff2jsonld(content_url, assembly_accession, assembly_strain, log_level, log_to_file):
+    '''
+    Creates GeneAnnotation objects from a GFF3 file and serializes them to JSON-LD format.
+    '''
+    gff3 = Gff3(
+        content_url, assembly_accession, assembly_strain, log_level, log_to_file
+    )
+    gff3.parse()
+    gff3.serialize_to_jsonld()
 
 
 if __name__ == "__main__":
-    pass
\ No newline at end of file
+    gff2jsonld()
diff --git a/bkbit/model_editors/add_dunderMethods_genomeAnnotation.py b/bkbit/model_editors/add_dunderMethods_genomeAnnotation.py
index 31cb1cf..507e988 100644
--- a/bkbit/model_editors/add_dunderMethods_genomeAnnotation.py
+++ b/bkbit/model_editors/add_dunderMethods_genomeAnnotation.py
@@ -1,23 +1,29 @@
 import re
 
 # Read the file
-file_path = "../models/genome_annotation.py"
+file_path = "bkbit/models/genome_annotation.py"
 with open(file_path, "r") as file:
     content = file.read()
 
+# Define the patterns to check if the functions already exist
+hash_pattern = r"def __hash__\(self\):"
+
 # Find the GeneAnnotation class
-pattern = r"class GeneAnnotation\(Gene\):\s+\"\"\"\n    An annotation describing the location, boundaries, and functions of  individual genes within a genome annotation.\n    \"\"\""
-match = re.search(pattern, content)
+class_pattern = r"class GeneAnnotation\(Gene\):\s+\"\"\"\n    An annotation describing the location, boundaries, and functions of  individual genes within a genome annotation.\n    \"\"\""
+class_match = re.search(class_pattern, content)
+
+if class_match:
+    class_start = class_match.end()
 
-if match:
-    # Add the function to the class
-    updated_content = content.replace(match.group(), match.group() + "\n    def __ne__(self, other):\n        return (self.description != other.description) or (self.molecular_type != other.molecular_type)\n    ")
-    updated_content = updated_content.replace(match.group(), match.group() + "\n    def __eq__(self, other):\n        return (self.description == other.description) and (self.molecular_type == other.molecular_type)\n    ")
-    updated_content = updated_content.replace(match.group(), match.group() + "\n\n    def __hash__(self):\n        return hash(tuple([self.id, self.name, self.molecular_type, self.description]))\n    ")
+    # Check if the functions already exist
+    has_hash = re.search(hash_pattern, content[class_start:])
+
+    # Add the functions only if they do not exist
+    if not has_hash:
+        content = content.replace(class_match.group(), class_match.group() + "\n\n    def __hash__(self):\n        return hash(tuple([self.id, self.name, self.molecular_type, self.description]))\n    ")
 
     # Write the updated content back to the file
     with open(file_path, "w") as file:
-        file.write(updated_content)
+        file.write(content)
 else:
     print("GeneAnnotation class not found in the file.")
-
diff --git a/bkbit/models/genome_annotation.py b/bkbit/models/genome_annotation.py
index a10acbc..d83ba6b 100644
--- a/bkbit/models/genome_annotation.py
+++ b/bkbit/models/genome_annotation.py
@@ -412,6 +412,10 @@ class GeneAnnotation(Gene):
     """
     An annotation describing the location, boundaries, and functions of  individual genes within a genome annotation.
     """
+
+    def __hash__(self):
+        return hash(tuple([self.id, self.name, self.molecular_type, self.description]))
+    
     molecular_type: Optional[Union[BioType, str]] = Field(None)
     source_id: Optional[str] = Field(None, description="""The authority specific identifier.""")
     referenced_in: Union[GenomeAnnotation, str] = Field(..., description="""The genome annotation that this gene annotation was referenced from.""")
diff --git a/bkbit/utils/get_ncbi_taxonomy.py b/bkbit/utils/get_ncbi_taxonomy.py
new file mode 100644
index 0000000..2b57023
--- /dev/null
+++ b/bkbit/utils/get_ncbi_taxonomy.py
@@ -0,0 +1,187 @@
+"""
+This script downloads a zip file containing taxonomic data from a given URL, extracts and processes 
+the content of the 'names.dmp' file in memory, and saves the parsed data into JSON files. The script
+includes three main functions:
+
+1. download_and_extract_zip_in_memory(url):
+    Downloads a zip file from the given URL and extracts the content of the 'names.dmp' file in memory.
+
+2. parse_dmp_content(dmp_content):
+    Parses the content of a DMP file and extracts taxonomic information into dictionaries.
+
+3. process_and_save_taxdmp_in_memory(url, output_dir):
+    Downloads and processes the taxdump file from the given URL, and saves the parsed data into 
+    separate JSON files in the specified output directory.
+
+Usage:
+    The script can be executed as a standalone program. Modify the URL and output directory as needed.
+"""
+
+import json
+import zipfile
+import io
+import os
+import requests
+import pkg_resources
+import click
+
+NCBI_TAXON_URL = "https://ftp.ncbi.nih.gov/pub/taxonomy/taxdmp.zip"
+OUTPUT_DIR_NAME = "ncbi_taxonomy"
+OUTPUT_DIR_PATH = pkg_resources.resource_filename(__name__, OUTPUT_DIR_NAME)
+SCIENTIFIC_NAME_TO_TAXONID_PATH = pkg_resources.resource_filename(__name__, "ncbi_taxonomy/scientific_name_to_taxid.json")
+TAXON_SCIENTIFIC_NAME_PATH = pkg_resources.resource_filename(__name__, "ncbi_taxonomy/taxid_to_scientific_name.json")
+TAXON_COMMON_NAME_PATH = pkg_resources.resource_filename(__name__, "ncbi_taxonomy/taxid_to_common_name.json")
+
+
+
+def download_and_extract_zip_in_memory(url):
+    """
+    Downloads a zip file from the given URL and extracts the content of the 'names.dmp' file in memory.
+
+    Args:
+        url (str): The URL of the zip file to download.
+
+    Returns:
+        str: The content of the 'names.dmp' file as a string.
+
+    Raises:
+        requests.exceptions.HTTPError: If the file download fails with a non-200 status code.
+    """
+    # Download the file
+    response = requests.get(url, timeout=30)
+    if response.status_code == 200:
+        # Unzip the file in memory
+        with zipfile.ZipFile(io.BytesIO(response.content)) as z:
+            # Extract names.dmp file content into memory
+            with z.open("names.dmp") as names_dmp_file:
+                names_dmp_content = names_dmp_file.read().decode("utf-8")
+                return names_dmp_content
+    else:
+        raise requests.exceptions.HTTPError(
+            f"Failed to download file, status code: {response.status_code}"
+        )
+
+
+def parse_dmp_content(dmp_content):
+    """
+    Parses the content of a DMP file and extracts taxonomic information.
+
+    Args:
+        dmp_content (str): The content of the DMP file.
+
+    Returns:
+        tuple: A tuple containing three dictionaries:
+            - taxid_to_scientific_name: A dictionary mapping taxonomic IDs to scientific names.
+            - taxid_to_common_name: A dictionary mapping taxonomic IDs to common names.
+            - scientific_name_to_taxid: A dictionary mapping scientific names to taxonomic IDs.
+    """
+    taxid_to_scientific_name = {}
+    taxid_to_common_name = {}
+    scientific_name_to_taxid = {}
+
+    for line in dmp_content.strip().split("\n"):
+        # Split the line by the delimiter '|'
+        parts = line.strip().split("|")
+
+        # Remove leading and trailing whitespace from each part
+        parts = [part.strip() for part in parts]
+        # Taxonomy names file (names.dmp):
+        #   tax_id-- the id of node associated with this name
+        #   name_txt-- name itself
+        #   unique name-- the unique variant of this name if name not unique
+        #   name class-- (synonym, common name, ...)
+        taxid = parts[0]
+        name = parts[1]
+        unique_name = parts[2]
+        name_class = parts[3]
+
+        # Create a dictionary with the parsed data
+        if name_class == "scientific name" and taxid not in taxid_to_scientific_name:
+            if unique_name:
+                taxid_to_scientific_name[taxid] = unique_name
+                scientific_name_to_taxid[unique_name] = taxid
+            else:
+                taxid_to_scientific_name[taxid] = name
+                scientific_name_to_taxid[name] = taxid
+        elif name_class == "genbank common name" and taxid not in taxid_to_common_name:
+            taxid_to_common_name[taxid] = name
+    return taxid_to_scientific_name, taxid_to_common_name, scientific_name_to_taxid
+
+
+def process_and_save_taxdmp_in_memory(url, output_dir):
+    """
+    Downloads and processes the taxdump file from the given URL,
+    and saves the parsed data into separate JSON files in the specified output directory.
+
+    Args:
+        url (str): The URL of the taxdump file to download and process.
+        output_dir (str): The directory where the parsed data will be saved.
+
+    Returns:
+        None
+    """
+    # Ensure the output directory exists
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    # Step 1: Download and unzip the folder in memory
+    names_dmp_content = download_and_extract_zip_in_memory(url)
+
+    # Step 2: Parse the names.dmp content
+    taxid_to_scientific_name, taxid_to_common_name, scientific_name_to_taxid = (
+        parse_dmp_content(names_dmp_content)
+    )
+
+    # Step 3: Save the dictionaries to files
+    with open(
+        os.path.join(output_dir, "taxid_to_common_name.json"), "w", encoding="utf-8"
+    ) as f:
+        json.dump(taxid_to_common_name, f, indent=4)
+
+    with open(
+        os.path.join(output_dir, "taxid_to_scientific_name.json"), "w", encoding="utf-8"
+    ) as f:
+        json.dump(taxid_to_scientific_name, f, indent=4)
+
+    with open(
+        os.path.join(output_dir, "scientific_name_to_taxid.json"), "w", encoding="utf-8"
+    ) as f:
+        json.dump(scientific_name_to_taxid, f, indent=4)
+
+
+    
+def load_json(file_path):
+    """
+    Load JSON data from a file.
+
+    Args:
+        file_path (str): The path to the JSON file.
+
+    Returns:
+        dict: The loaded JSON data.
+
+    """
+    with open(file_path, "r", encoding="utf-8") as f:
+        return json.load(f)
+
+@click.command()
+@click.option("--reload", '-r', is_flag=True, help="Reload NCBI taxonomy data")
+
+def download_ncbi_taxonomy(reload=False):
+
+    """
+    Load JSON data from a file.
+
+    Args:
+        file_path (str): The path to the JSON file.
+
+    Returns:
+        dict: The loaded JSON data as a dictionary.
+    """
+    if reload or not os.path.exists(SCIENTIFIC_NAME_TO_TAXONID_PATH) or not os.path.exists(TAXON_SCIENTIFIC_NAME_PATH) or not os.path.exists(TAXON_COMMON_NAME_PATH):
+        process_and_save_taxdmp_in_memory(NCBI_TAXON_URL, OUTPUT_DIR_PATH)
+    else:
+        print("PRINT already downloaded")
+
+if __name__ == "__main__":
+    download_ncbi_taxonomy() 
diff --git a/bkbit/utils/load_json.py b/bkbit/utils/load_json.py
new file mode 100644
index 0000000..d16b6bf
--- /dev/null
+++ b/bkbit/utils/load_json.py
@@ -0,0 +1,19 @@
+import json
+
+def load_json(file_path):
+    """
+    Load a JSON file from the given file path.
+
+    Args:
+        file_path (str): The path to the JSON file.
+
+    Returns:
+        dict: The contents of the JSON file as a dictionary.
+
+    Raises:
+        FileNotFoundError: If the file does not exist.
+        JSONDecodeError: If the file is not a valid JSON.
+
+    """
+    with open(file_path, "r", encoding="utf-8") as f:
+        return json.load(f)
\ No newline at end of file
diff --git a/bkbit/utils/setup_logger.py b/bkbit/utils/setup_logger.py
new file mode 100644
index 0000000..9d4f772
--- /dev/null
+++ b/bkbit/utils/setup_logger.py
@@ -0,0 +1,80 @@
+"""
+Logger Setup Module
+
+This module provides a utility function to configure and set up logging for an application.
+The `setup_logger` function allows for customizable logging levels and output destinations,
+either to a file or to the console.
+
+Available log levels:
+- DEBUG
+- INFO
+- WARNING
+- ERROR
+- CRITICAL
+
+Example usage:
+    from setup_logger import setup_logger
+    import logging
+
+    # Set up the logger to log to a file with INFO level
+    logger = setup_logger(log_level="INFO", log_to_file=True)
+
+    # Log some messages
+    logger.info("This is an info message")
+    logger.error("This is an error message")
+
+Functions:
+    setup_logger(log_level="WARNING", log_to_file=False):
+        Configures and returns a logger with the specified log level and output destination.
+
+Attributes:
+    LOG_LEVELS (dict): A dictionary mapping log level names to their corresponding logging constants.
+"""
+
+import logging
+
+LOG_LEVELS = {
+    "DEBUG": logging.DEBUG,
+    "INFO": logging.INFO,
+    "WARNING": logging.WARNING,
+    "ERROR": logging.ERROR,
+    "CRITICAL": logging.CRITICAL,
+}
+
+
+def setup_logger(
+    file_name,
+    log_level="WARNING",
+    log_to_file=False,
+):
+    """
+    Set up a logger with the specified log level and log destination.
+
+    Args:
+        log_level (str, optional): The desired log level. Defaults to "WARNING".
+        log_to_file (bool, optional): Whether to log to a file. Defaults to False.
+
+    Returns:
+        logger: The configured logger object.
+
+    Raises:
+        ValueError: If an invalid log level is provided.
+    """
+    if log_level.upper() not in LOG_LEVELS:
+        raise ValueError(f"Invalid log level: {log_level}")
+    if log_to_file:
+        logging.basicConfig(
+            filename=file_name,
+            format="%(levelname)s: %(message)s (%(asctime)s)",
+            datefmt="%m/%d/%Y %I:%M:%S %p",
+            level=LOG_LEVELS[log_level.upper()],
+        )
+    else:
+        logging.basicConfig(
+            format="%(levelname)s: %(message)s (%(asctime)s)",
+            datefmt="%m/%d/%Y %I:%M:%S %p",
+            level=LOG_LEVELS[log_level.upper()],
+        )
+
+    logger = logging.getLogger(__name__)
+    return logger
diff --git a/pyproject.toml b/pyproject.toml
index e8d96ef..db7caa2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,9 +6,9 @@ build-backend = 'setuptools.build_meta'
 [project]
 name = "bkbit"
 authors = [
-  { name="bkbit developers"},
+  { name="brain-bican-bkbit developers"},
 ]
-description = "A library for atlaskb data models"
+description = "A library for using brain-bican data models"
 readme = "README.md"
 requires-python = ">=3.7"
 classifiers = [
@@ -36,7 +36,7 @@ docs = [
 ]
 
 [project.urls]
-"Homepage" = "https://github.com/atlaskb/bkbit"
+"Homepage" = "https://github.com/brain-bican/bkbit"
 
 [tool.setuptools_scm]
 write_to = "bkbit/_version.py"
\ No newline at end of file