Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

staging: run recipes from firebase #179

Merged
merged 45 commits into from
Dec 18, 2023
Merged
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
94ec7cd
[wip] prep recipe data for packing
rugeli Jun 14, 2023
2968f38
get creds from local file
meganrm Jun 15, 2023
6014501
save firebase creds to a .creds file
meganrm Jun 15, 2023
fb441f0
remove cred arg
rugeli Jun 20, 2023
0481d25
check for already handled values in remote recipes
rugeli Jun 20, 2023
b5b38c9
can pack one sphere
rugeli Jun 26, 2023
e921c35
* adding username to .creds
mogres Jul 5, 2023
0a3afff
move `write_username_to_creds`
mogres Jul 5, 2023
2dbfaaf
download recipe testing
rugeli Jul 10, 2023
b419a57
edit comment
rugeli Jul 10, 2023
f77163d
Merge branch 'feature/run-recipes-from-firebase' of https://github.co…
rugeli Jul 10, 2023
a0cd352
code refactor
rugeli Jul 10, 2023
bafba75
lint
rugeli Jul 10, 2023
a928c1d
format tests
rugeli Jul 13, 2023
cc0e19a
Merge branch 'main' of https://github.com/mesoscope/cellpack into fea…
rugeli Aug 18, 2023
1a8b715
add prep_db_doc
rugeli Aug 21, 2023
6cad8ee
changed class name in DBRecipeHandler
rugeli Aug 23, 2023
28af176
fix lint and test errors
rugeli Aug 23, 2023
68660b3
initialize firebase handler only once
rugeli Aug 25, 2023
14b3876
refactor message
rugeli Aug 25, 2023
70e1b12
add remote db options in `pack`
rugeli Aug 28, 2023
09fcbef
remove a print statement
rugeli Aug 28, 2023
f864fc6
rename and reorg DB handler
rugeli Sep 5, 2023
64dc318
fix tests
rugeli Sep 5, 2023
7672a93
move database_ids enum to interface_objects
rugeli Sep 6, 2023
ad77057
remove db_handler in pack and recipe_loader
rugeli Sep 6, 2023
f4c78ae
send db_handler in to autopack
rugeli Sep 6, 2023
1908692
rename functions
rugeli Sep 6, 2023
88abfbe
integrate DATABASE_NAMES into interface_objects
rugeli Sep 7, 2023
dfe19c8
lint
rugeli Sep 7, 2023
7209086
Merge branch 'main' of https://github.com/mesoscope/cellpack into fea…
rugeli Sep 27, 2023
cc97646
Feature/run inherited objects (#198)
rugeli Oct 12, 2023
7e2c633
Merge branch 'main' of https://github.com/mesoscope/cellpack into fea…
rugeli Oct 17, 2023
0d577fe
Merge branch 'feature/run-recipes-from-firebase' of https://github.co…
rugeli Oct 17, 2023
44c3cb2
formatting
rugeli Oct 17, 2023
7c0aae3
testing and refactor
rugeli Oct 23, 2023
1f5a0d8
Merge branch 'main' of https://github.com/mesoscope/cellpack into fea…
rugeli Oct 23, 2023
6f01cfd
Feature/save metadata to firebase (#206)
rugeli Nov 6, 2023
3e691a2
refactor
rugeli Nov 10, 2023
7364a86
Update .gitignore
rugeli Dec 12, 2023
c4b9786
add file existence check
rugeli Dec 12, 2023
4e392ea
Merge branch 'feature/run-recipes-from-firebase' of https://github.co…
rugeli Dec 12, 2023
55a313b
refactor is_nested_list method
rugeli Dec 12, 2023
74c43b3
revert write_json_file
rugeli Dec 13, 2023
93787fc
formatting
rugeli Dec 13, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ dask-worker-space

# dotenv
.env
.creds
rugeli marked this conversation as resolved.
Show resolved Hide resolved

# virtualenv
.venv
Expand Down
182 changes: 160 additions & 22 deletions cellpack/autopack/DBRecipeHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,28 @@ def should_write():
def is_key(string_or_dict):
return not isinstance(string_or_dict, dict)

@staticmethod
def is_nested_list(item):
return (
isinstance(item, list)
and len(item) > 0
and isinstance(item[0], (list, tuple))
)

@staticmethod
def is_db_dict(item):
if isinstance(item, dict) and len(item) > 0:
for key, value in item.items():
if key.isdigit() and isinstance(value, list):
return True
return False

@staticmethod
def is_obj(comp_or_obj):
# in resolved DB data, if the top level of a downloaded comp doesn't have the key `name`, it's an obj
# TODO: true for all cases? better approaches?
return not comp_or_obj.get("name") and "object" in comp_or_obj


class CompositionDoc(DataDoc):
"""
Expand Down Expand Up @@ -124,7 +146,7 @@ def resolve_local_regions(self, local_data, recipe_data, db):
Recursively resolves the regions of a composition from local data.
Restructure the local data to match the db data.
"""
unpack_recipe_data = DBRecipeHandler.prep_data_for_db(recipe_data)
unpack_recipe_data = DBUploader.prep_data_for_db(recipe_data)
prep_recipe_data = ObjectDoc.convert_representation(unpack_recipe_data, db)
# `gradients` is a list, convert it to dict for easy access and replace
CompositionDoc.gradient_list_to_dict(prep_recipe_data)
Expand Down Expand Up @@ -321,7 +343,7 @@ def should_write(self, db):
# if there is repr in the obj doc from db
full_doc_data = ObjectDoc.convert_representation(doc, db)
# unpack objects to dicts in local data for comparison
local_data = DBRecipeHandler.prep_data_for_db(self.as_dict())
local_data = DBUploader.prep_data_for_db(self.as_dict())
difference = DeepDiff(full_doc_data, local_data, ignore_order=True)
if not difference:
return doc, db.doc_id(doc)
Expand All @@ -337,29 +359,25 @@ def should_write(self, db, grad_name):
docs = db.get_doc_by_name("gradients", grad_name)
if docs and len(docs) >= 1:
for doc in docs:
local_data = DBRecipeHandler.prep_data_for_db(db.doc_to_dict(doc))
local_data = DBUploader.prep_data_for_db(db.doc_to_dict(doc))
db_data = db.doc_to_dict(doc)
difference = DeepDiff(db_data, local_data, ignore_order=True)
if not difference:
return doc, db.doc_id(doc)
return None, None


class DBRecipeHandler(object):
class DBUploader(object):
"""
Handles the uploading of data to the database.
"""

def __init__(self, db_handler):
self.db = db_handler
self.objects_to_path_map = {}
self.comp_to_path_map = {}
self.grad_to_path_map = {}

@staticmethod
def is_nested_list(item):
return (
isinstance(item, list)
and len(item) > 0
and isinstance(item[0], (list, tuple))
)

@staticmethod
def prep_data_for_db(data):
"""
Expand All @@ -368,20 +386,18 @@ def prep_data_for_db(data):
modified_data = {}
for key, value in data.items():
# convert 2d array to dict
if DBRecipeHandler.is_nested_list(value):
if DataDoc.is_nested_list(value):
flatten_dict = dict(zip([str(i) for i in range(len(value))], value))
modified_data[key] = DBRecipeHandler.prep_data_for_db(flatten_dict)
modified_data[key] = DBUploader.prep_data_for_db(flatten_dict)
# If the value is an object, we want to convert it to dict
elif isinstance(value, object) and "__dict__" in dir(value):
unpacked_value = vars(value)
modified_data[key] = unpacked_value
if isinstance(unpacked_value, dict):
modified_data[key] = DBRecipeHandler.prep_data_for_db(
unpacked_value
)
modified_data[key] = DBUploader.prep_data_for_db(unpacked_value)
# If the value is a dictionary, recursively convert its nested lists to dictionaries
elif isinstance(value, dict):
modified_data[key] = DBRecipeHandler.prep_data_for_db(value)
modified_data[key] = DBUploader.prep_data_for_db(value)
else:
modified_data[key] = value
return modified_data
Expand All @@ -391,7 +407,7 @@ def upload_data(self, collection, data, id=None):
If should_write is true, upload the data to the database
"""
# check if we need to convert part of the data(2d arrays and objs to dict)
modified_data = DBRecipeHandler.prep_data_for_db(data)
modified_data = DBUploader.prep_data_for_db(data)
if id is None:
name = modified_data["name"]
doc = self.db.upload_doc(collection, modified_data)
Expand Down Expand Up @@ -482,7 +498,7 @@ def upload_compositions(self, compositions, recipe_to_save, recipe_data):
references_to_update[comp_name].update({"comp_id": doc_id})
return references_to_update

def get_recipe_id(self, recipe_data):
def _get_recipe_id(self, recipe_data):
"""
We use customized recipe id to declare recipe's name and version
"""
Expand Down Expand Up @@ -526,12 +542,134 @@ def upload_recipe(self, recipe_meta_data, recipe_data):
"""
After all other collections are checked or uploaded, upload the recipe with references into db
"""
recipe_id = self.get_recipe_id(recipe_data)
recipe_id = self._get_recipe_id(recipe_data)
# if the recipe is already exists in db, just return
recipe, _ = self.db.get_doc_by_id("recipes", recipe_id)
if recipe:
print(f"{recipe_id} is already in firestore")
return
recipe_to_save = self.upload_collections(recipe_meta_data, recipe_data)
key = self.get_recipe_id(recipe_to_save)
key = self._get_recipe_id(recipe_to_save)
self.upload_data("recipes", recipe_to_save, key)


class DBRecipeLoader(object):
"""
Handles the logic for downloading and parsing the recipe data from the database.
"""

def __init__(self, db_handler):
self.db = db_handler

def prep_db_doc_for_download(self, db_doc):
"""
convert data from db and resolve references.
"""
prep_data = {}
if isinstance(db_doc, dict):
for key, value in db_doc.items():
if DataDoc.is_db_dict(value):
unpack_dict = [value[str(i)] for i in range(len(value))]
prep_data[key] = unpack_dict
elif key == "composition":
compositions = db_doc["composition"]
for comp_name, reference in compositions.items():
ref_link = reference["inherit"]
comp_doc = CompositionDoc(
comp_name,
object_key=None,
count=None,
regions={},
molarity=None,
)
composition_data, _ = comp_doc.get_reference_data(
ref_link, self.db
)
comp_doc.resolve_db_regions(composition_data, self.db)
compositions[comp_name] = composition_data
prep_data[key] = compositions
else:
prep_data[key] = value
return prep_data

def collect_docs_by_id(self, collection, id):
return self.db.get_doc_by_id(collection, id)

@staticmethod
def _get_grad_and_obj(obj_data, obj_dict, grad_dict):
try:
grad_name = obj_data["gradient"]["name"]
obj_name = obj_data["name"]
except KeyError as e:
print(f"Missing keys in object: {e}")
return obj_dict, grad_dict

grad_dict[grad_name] = obj_data["gradient"]
obj_dict[obj_name]["gradient"] = grad_name
return obj_dict, grad_dict

@staticmethod
def collect_and_sort_data(comp_data):
"""
Collect all object and gradient info from the downloaded composition data
Return autopack object data dict and gradient data dict with name as key
Return restructured composition dict with "composition" as key
"""
objects = {}
gradients = {}
composition = {}
for comp_name, comp_value in comp_data.items():
composition[comp_name] = {}
if "count" in comp_value and comp_value["count"] is not None:
composition[comp_name]["count"] = comp_value["count"]
if "object" in comp_value and comp_value["object"] is not None:
composition[comp_name]["object"] = comp_value["object"]["name"]
object_copy = copy.deepcopy(comp_value["object"])
objects[object_copy["name"]] = object_copy
if "gradient" in object_copy and isinstance(
object_copy["gradient"], dict
):
objects, gradients = DBRecipeLoader._get_grad_and_obj(
object_copy, objects, gradients
)
if "regions" in comp_value and comp_value["regions"] is not None:
for region_name in comp_value["regions"]:
composition[comp_name].setdefault("regions", {})[region_name] = []
for region_item in comp_value["regions"][region_name]:
if DataDoc.is_obj(region_item):
composition[comp_name]["regions"][region_name].append(
{
"object": region_item["object"].get("name"),
"count": region_item.get("count"),
}
)
object_copy = copy.deepcopy(region_item["object"])
objects[object_copy["name"]] = object_copy
if "gradient" in object_copy and isinstance(
object_copy["gradient"], dict
):
objects, gradients = DBRecipeLoader._get_grad_and_obj(
object_copy, objects, gradients
)
else:
composition[comp_name]["regions"][region_name].append(
region_item["name"]
)
return objects, gradients, composition

@staticmethod
def compile_db_recipe_data(db_recipe_data, obj_dict, grad_dict, comp_dict):
"""
Compile recipe data from db recipe data into a ready-to-pack structure
"""
recipe_data = {
**{
k: db_recipe_data[k]
for k in ["format_version", "version", "name", "bounding_box"]
},
"objects": obj_dict,
"composition": comp_dict,
}
if grad_dict:
recipe_data["gradients"] = [{**v} for v in grad_dict.values()]
return recipe_data
31 changes: 29 additions & 2 deletions cellpack/autopack/FirebaseHandler.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import firebase_admin
import ast
from firebase_admin import credentials, firestore
from cellpack.autopack.loaders.utils import read_json_file, write_json_file


class FirebaseHandler(object):
"""
Retrieve data and perform common tasks when working with firebase.
"""

def __init__(self, cred_path):
def __init__(self):
cred_path = FirebaseHandler.get_creds()
login = credentials.Certificate(cred_path)
firebase_admin.initialize_app(login)
self.db = firestore.client()
Expand All @@ -17,6 +20,28 @@ def __init__(self, cred_path):
def doc_to_dict(doc):
return doc.to_dict()

@staticmethod
def write_creds_path():
path = ast.literal_eval(input("provide path to firebase credentials: "))
data = read_json_file(path)
if data is None:
raise ValueError("The path to your credentials doesn't exist")
firebase_cred = {"firebase": data}
creds = read_json_file("./.creds")
if creds is None:
write_json_file("./.creds", firebase_cred)
else:
creds["firebase"] = data
write_json_file("./.creds", creds)
return firebase_cred

@staticmethod
def get_creds():
creds = read_json_file("./.creds")
if creds is None or "firebase" not in creds:
creds = FirebaseHandler.write_creds_path()
return creds["firebase"]

def db_name(self):
return self.name

Expand Down Expand Up @@ -87,7 +112,9 @@ def set_doc(self, collection, id, data):
print(f"successfully uploaded to path: {doc_ref.path}")
return doc_ref
else:
print(f"ERROR, already data at this path:{collection}/{id}")
print(
f"ERROR: {doc_ref.path} already exists. If uploading new data, provide a unique recipe name."
)
return

def upload_doc(self, collection, data):
Expand Down
Loading
Loading