Skip to content

Commit

Permalink
Merge branch 'feature/run-recipes-from-firebase' of https://github.co…
Browse files Browse the repository at this point in the history
…m/mesoscope/cellpack into feature/upload-to-s3
  • Loading branch information
rugeli committed Jul 5, 2023
2 parents ba82714 + e921c35 commit 09f1534
Show file tree
Hide file tree
Showing 10 changed files with 326 additions and 18 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ dask-worker-space

# dotenv
.env
.creds

# virtualenv
.venv
Expand Down
123 changes: 119 additions & 4 deletions cellpack/autopack/DBRecipeHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ def as_dict(self):
data["regions"] = self.regions
return data

@staticmethod
def get_gradient_reference(downloaded_data, db):
if "gradient" in downloaded_data and db.is_reference(
downloaded_data["gradient"]
):
gradient_key = downloaded_data["gradient"]
downloaded_data["gradient"], _ = db.get_doc_by_ref(gradient_key)

@staticmethod
def get_reference_data(key_or_dict, db):
"""
Expand All @@ -66,12 +74,14 @@ def get_reference_data(key_or_dict, db):
if DataDoc.is_key(key_or_dict) and db.is_reference(key_or_dict):
key = key_or_dict
downloaded_data, _ = db.get_doc_by_ref(key)
CompositionDoc.get_gradient_reference(downloaded_data, db)
return downloaded_data, None
elif key_or_dict and isinstance(key_or_dict, dict):
object_dict = key_or_dict
if "object" in object_dict and db.is_reference(object_dict["object"]):
key = object_dict["object"]
downloaded_data, _ = db.get_doc_by_ref(key)
CompositionDoc.get_gradient_reference(downloaded_data, db)
return downloaded_data, key
return {}, None

Expand All @@ -96,19 +106,40 @@ def resolve_db_regions(self, db_data, db):
):
self.resolve_db_regions(downloaded_data, db)

@staticmethod
def gradient_list_to_dict(prep_recipe_data):
"""
Convert gradient list to dict for resolve_local_regions
"""
if "gradients" in prep_recipe_data and isinstance(
prep_recipe_data["gradients"], list
):
gradient_dict = {}
for gradient in prep_recipe_data["gradients"]:
gradient_dict[gradient["name"]] = gradient
prep_recipe_data["gradients"] = gradient_dict

def resolve_local_regions(self, local_data, recipe_data, db):
"""
Recursively resolves the regions of a composition from local data.
Restructure the local data to match the db data.
"""
unpack_recipe_data = DBRecipeHandler.prep_data_for_db(recipe_data)
prep_recipe_data = ObjectDoc.convert_representation(unpack_recipe_data, db)
# `gradients` is a list, convert it to dict for easy access and replace
CompositionDoc.gradient_list_to_dict(prep_recipe_data)
if "object" in local_data and local_data["object"] is not None:
if DataDoc.is_key(local_data["object"]):
key_name = local_data["object"]
else:
key_name = local_data["object"]["name"]
local_data["object"] = prep_recipe_data["objects"][key_name]
if "gradient" in local_data["object"] and isinstance(
local_data["object"]["gradient"], str
):
local_data["object"]["gradient"] = prep_recipe_data["gradients"][
local_data["object"]["gradient"]
]
for region_name in local_data["regions"]:
for index, key_or_dict in enumerate(local_data["regions"][region_name]):
if not DataDoc.is_key(key_or_dict):
Expand All @@ -121,6 +152,12 @@ def resolve_local_regions(self, local_data, recipe_data, db):
local_data["regions"][region_name][index][
"object"
] = prep_recipe_data["objects"][obj_item["name"]]
# replace gradient reference with gradient data
obj_data = local_data["regions"][region_name][index]["object"]
if "gradient" in obj_data and isinstance(obj_data["gradient"], str):
local_data["regions"][region_name][index]["object"][
"gradient"
] = prep_recipe_data["gradients"][obj_data["gradient"]]
else:
comp_name = local_data["regions"][region_name][index]
prep_comp_data = prep_recipe_data["composition"][comp_name]
Expand Down Expand Up @@ -296,11 +333,29 @@ def should_write(self, db):
return None, None


class GradientDoc(DataDoc):
def __init__(self, settings):
super().__init__()
self.settings = settings

def should_write(self, db, grad_name):
docs = db.get_doc_by_name("gradients", grad_name)
if docs and len(docs) >= 1:
for doc in docs:
local_data = DBRecipeHandler.prep_data_for_db(db.doc_to_dict(doc))
db_data = db.doc_to_dict(doc)
difference = DeepDiff(db_data, local_data, ignore_order=True)
if not difference:
return doc, db.doc_id(doc)
return None, None


class DBRecipeHandler(object):
def __init__(self, db_handler):
self.db = db_handler
self.objects_to_path_map = {}
self.comp_to_path_map = {}
self.grad_to_path_map = {}

@staticmethod
def is_nested_list(item):
Expand All @@ -310,6 +365,14 @@ def is_nested_list(item):
and isinstance(item[0], (list, tuple))
)

@staticmethod
def is_db_dict(item):
if isinstance(item, dict) and len(item) > 0:
for key, value in item.items():
if key.isdigit() and isinstance(value, list):
return True
return False

@staticmethod
def prep_data_for_db(data):
"""
Expand Down Expand Up @@ -355,10 +418,29 @@ def upload_data(self, collection, data, id=None):
doc = self.db.set_doc(collection, id, modified_data)
return id, self.db.create_path(collection, id)

def upload_gradients(self, gradients):
for gradient in gradients:
gradient_name = gradient["name"]
gradient_doc = GradientDoc(settings=gradient)
_, doc_id = gradient_doc.should_write(self.db, gradient_name)
if doc_id:
print(f"gradients/{gradient_name} is already exists in firestore")
self.grad_to_path_map[gradient_name] = self.db.create_path(
"gradients", doc_id
)
else:
_, grad_path = self.upload_data("gradients", gradient_doc.settings)
self.grad_to_path_map[gradient_name] = grad_path

def upload_objects(self, objects):
for obj_name in objects:
objects[obj_name]["name"] = obj_name
object_doc = ObjectDoc(name=obj_name, settings=objects[obj_name])
modify_objects = copy.deepcopy(objects)
# replace gradient name with path before uploading
if "gradient" in modify_objects[obj_name]:
grad_name = modify_objects[obj_name]["gradient"]
modify_objects[obj_name]["gradient"] = self.grad_to_path_map[grad_name]
object_doc = ObjectDoc(name=obj_name, settings=modify_objects[obj_name])
_, doc_id = object_doc.should_write(self.db)
if doc_id:
print(f"objects/{object_doc.name} is already in firestore")
Expand Down Expand Up @@ -416,18 +498,20 @@ def get_recipe_id(self, recipe_data):
"""
recipe_name = recipe_data["name"]
recipe_version = recipe_data["version"]
key = f"{recipe_name}_v{recipe_version}"
key = f"{recipe_name}_v-{recipe_version}"
return key

def upload_collections(self, recipe_meta_data, recipe_data):
"""
Separate collections from recipe data and upload them to db
"""
recipe_to_save = copy.deepcopy(recipe_meta_data)
gradients = recipe_data.get("gradients")
objects = recipe_data["objects"]
compositions = recipe_data["composition"]
# TODO: test gradients recipes
# gradients = recipe_data.get("gradients")
# save gradients to db
if gradients:
self.upload_gradients(gradients)
# save objects to db
self.upload_objects(objects)
# save comps to db
Expand Down Expand Up @@ -461,3 +545,34 @@ def upload_recipe(self, recipe_meta_data, recipe_data):
recipe_to_save = self.upload_collections(recipe_meta_data, recipe_data)
key = self.get_recipe_id(recipe_to_save)
self.upload_data("recipes", recipe_to_save, key)

def prep_db_doc_for_download(self, db_doc):
"""
convert data from db and resolve references.
"""
prep_data = {}
if isinstance(db_doc, dict):
for key, value in db_doc.items():
if self.is_db_dict(value):
unpack_dict = [value[str(i)] for i in range(len(value))]
prep_data[key] = unpack_dict
elif key == "composition":
compositions = db_doc["composition"]
for comp_name, reference in compositions.items():
ref_link = reference["inherit"]
comp_doc = CompositionDoc(
comp_name,
object_key=None,
count=None,
regions={},
molarity=None,
)
composition_data, _ = comp_doc.get_reference_data(
ref_link, self.db
)
comp_doc.resolve_db_regions(composition_data, self.db)
compositions[comp_name] = composition_data
prep_data[key] = compositions
else:
prep_data[key] = value
return prep_data
30 changes: 29 additions & 1 deletion cellpack/autopack/FirebaseHandler.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import firebase_admin
import ast
from firebase_admin import credentials, firestore
from cellpack.autopack.loaders.utils import read_json_file, write_json_file


class FirebaseHandler(object):
"""
Retrieve data and perform common tasks when working with firebase.
"""

def __init__(self, cred_path):
def __init__(self):
cred_path = FirebaseHandler.get_creds()
login = credentials.Certificate(cred_path)
firebase_admin.initialize_app(login)
self.db = firestore.client()
Expand All @@ -17,6 +20,29 @@ def __init__(self, cred_path):
def doc_to_dict(doc):
return doc.to_dict()

@staticmethod
def write_creds_path():
path = ast.literal_eval(input("provide path to firebase credentials: "))
print(path)
data = read_json_file(path)
if data is None:
raise ValueError("The path to your credentials doesn't exist")
firebase_cred = {"firebase": data}
creds = read_json_file("./.creds")
if creds is None:
write_json_file("./.creds", firebase_cred)
else:
creds["firebase"] = data
write_json_file("./.creds", creds)
return firebase_cred

@staticmethod
def get_creds():
creds = read_json_file("./.creds")
if creds is None or "firebase" not in creds:
creds = FirebaseHandler.write_creds_path()
return creds["firebase"]

def db_name(self):
return self.name

Expand Down Expand Up @@ -51,6 +77,8 @@ def update_elements_in_array(doc_ref, index, new_item_ref, remove_item):

@staticmethod
def is_reference(path):
if not isinstance(path, str):
return False
if path is None:
return False
if path.startswith("firebase:"):
Expand Down
30 changes: 25 additions & 5 deletions cellpack/autopack/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,17 @@
import re
import shutil
from os import path, environ
import pwd
from pathlib import Path
import urllib.request as urllib
from collections import OrderedDict
import ssl
import json

from cellpack.autopack.interface_objects.meta_enum import MetaEnum
from cellpack.autopack.FirebaseHandler import FirebaseHandler
from cellpack.autopack.DBRecipeHandler import DBRecipeHandler
from cellpack.autopack.loaders.utils import read_json_file, write_json_file


packageContainsVFCommands = 1
Expand Down Expand Up @@ -281,7 +285,8 @@ def is_remote_path(file_path):
@param file_path: str
"""
for ele in DATABASE_NAME:
return ele in file_path
if ele in file_path:
return ele in file_path


def convert_db_shortname_to_url(file_location):
Expand Down Expand Up @@ -381,12 +386,17 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None)


def load_file(filename, destination="", cache="geometries", force=None):
# what is the param destination for? should we use it to store db names?
if is_remote_path(filename):
database_name, file_path = convert_db_shortname_to_url(filename)
# command example: `pack -r firebase:recipes/peroxisomes_surface_gradient_v-linear -c examples/packing-configs/run.json`
if database_name == "firebase":
# TODO: read from firebase
# return data
pass
recipe_id = file_path.split("/")[-1]
db = FirebaseHandler()
db_doc, _ = db.get_doc_by_id(collection="recipes", id=recipe_id)
db_handler = DBRecipeHandler(db)
downloaded_recipe_data = db_handler.prep_db_doc_for_download(db_doc)
return downloaded_recipe_data, database_name
else:
local_file_path = get_local_file_location(
file_path, destination=destination, cache=cache, force=force
Expand All @@ -395,7 +405,7 @@ def load_file(filename, destination="", cache="geometries", force=None):
local_file_path = get_local_file_location(
filename, destination=destination, cache=cache, force=force
)
return json.load(open(local_file_path, "r"))
return json.load(open(local_file_path, "r")), None


def fixPath(adict): # , k, v):
Expand Down Expand Up @@ -531,6 +541,15 @@ def clearCaches(*args):
print("problem cleaning ", cache_dir[k])


def write_username_to_creds():
username = pwd.getpwuid(os.getuid())[0]
creds = read_json_file("./.creds")
if creds is None or "username" not in creds:
creds = {}
creds["username"] = username
write_json_file("./.creds", creds)


# we should read a file to fill the RECIPE Dictionary
# so we can add some and write/save setup
# afdir or user_pref
Expand All @@ -539,6 +558,7 @@ def clearCaches(*args):
checkPath()
updatePathJSON()
checkRecipeAvailable()
write_username_to_creds()
log.info("path are updated ")

log.info(f"currently number recipes is {len(RECIPES)}")
Expand Down
3 changes: 2 additions & 1 deletion cellpack/autopack/loaders/migrate_v1_to_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def split_ingredient_data(object_key, ingredient_data):

def get_and_store_v2_object(ingredient_key, ingredient_data, region_list, objects_dict):
if "include" in ingredient_data:
ingredient_data = load_file(ingredient_data["include"], cache="recipes")
ingredient_data, _ = load_file(ingredient_data["include"], cache="recipes")
check_required_attributes(ingredient_data)
converted_ingredient = migrate_ingredient(ingredient_data)
object_info, composition_info = split_ingredient_data(
Expand All @@ -145,6 +145,7 @@ def convert(old_recipe):
new_recipe["name"] = old_recipe["recipe"]["name"]
new_recipe["bounding_box"] = old_recipe["options"]["boundingBox"]
objects_dict = {}
# TODO: check if composition structure is correct
composition = {"space": {"regions": {}}}
if "cytoplasme" in old_recipe:
outer_most_region_array = []
Expand Down
Loading

0 comments on commit 09f1534

Please sign in to comment.