Skip to content

Commit

Permalink
save CAS zip support
Browse files Browse the repository at this point in the history
  • Loading branch information
hkir-dev committed Jul 8, 2024
1 parent d80a9b4 commit fbbe7ee
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 1 deletion.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

setup(
name="tdta",
version="0.1.0.dev15",
version="0.1.0.dev16",
description="The aim of this project is to provide taxonomy development tools custom actions.",
long_description=README,
long_description_content_type="text/markdown",
Expand Down
40 changes: 40 additions & 0 deletions src/tdta/tdt_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
import ast
import json
import typing
import zipfile
import shutil
from typing import Union, List
from contextlib import closing
from pathlib import Path
from datetime import datetime

from tdta.utils import read_project_config
from tdta.command_line_utils import runcmd
from cas.model import (CellTypeAnnotation, Annotation, Labelset, AnnotationTransfer, AutomatedAnnotation, Review)
from cas.file_utils import write_json_file
from cas.matrix_file.resolver import resolve_matrix_file
Expand All @@ -18,6 +21,9 @@

cas_table_names = ["annotation", "labelset", "metadata", "annotation_transfer", "review"]

GITHUB_SIZE_LIMIT = 50 * 1000 * 1000 # 50 MB
# GITHUB_SIZE_LIMIT = 2 * 1000


def export_cas_data(sqlite_db: str, output_file: str, dataset_cache_folder: str = None):
"""
Expand Down Expand Up @@ -62,9 +68,43 @@ def export_cas_data(sqlite_db: str, output_file: str, dataset_cache_folder: str
write_json_file(cta, output_file, False)

print("CAS json successfully created at: {}".format(output_file))
ensure_file_size_limit(output_file)
return cta


def ensure_file_size_limit(file_path):
"""
Checks if the file size exceeds the GitHub size limit and zips the file if needed.
Parameters:
file_path: file path to check
"""
if os.path.getsize(file_path) > GITHUB_SIZE_LIMIT:
zip_path = zip_file(file_path)
folder = os.path.dirname(file_path)
is_git_repo = runcmd("cd {dir} && git rev-parse --is-inside-work-tree".format(dir=folder)).strip()
if is_git_repo == "true":
runcmd("cd {dir} && git reset {file_path}".format(dir=folder, file_path=file_path))
runcmd("cd {dir} && git add {zip_path}".format(dir=folder, zip_path=zip_path))


def zip_file(file_path):
"""
Zips the file into smaller parts if it exceeds the GitHub size limit.
Parameters:
file_path: file path to zip
Returns: zipped file path
"""
folder = os.path.dirname(file_path)
base_name = os.path.basename(file_path)
zip_base = os.path.splitext(base_name)[0]

single_zip_path = os.path.join(folder, f"{zip_base}.zip")
with zipfile.ZipFile(single_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
zipf.write(file_path, base_name)
print("File zipped due to GitHub size limits: " + single_zip_path)
return single_zip_path


def parse_metadata_data(cta, sqlite_db, table_name):
"""
Reads 'Metadata' table data into the CAS object
Expand Down

0 comments on commit fbbe7ee

Please sign in to comment.